From: Michael Zuckerman Date: Wed, 25 May 2016 14:04:21 +0000 (+0000) Subject: [Clang][AVX512][BUILTIN] Add missing intrinsics for cast X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1c3b22987674a09aeb2d7dcc65cfc2d2d722e39b;p=clang [Clang][AVX512][BUILTIN] Add missing intrinsics for cast Differential Revision: http://reviews.llvm.org/D20523 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@270699 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 57a1de00b2..297c9b873d 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -337,12 +337,35 @@ _mm512_castpd512_pd128(__m512d __a) return __builtin_shufflevector(__a, __a, 0, 1); } +static __inline __m256d __DEFAULT_FN_ATTRS +_mm512_castpd512_pd256 (__m512d __A) +{ + return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); +} + static __inline __m128 __DEFAULT_FN_ATTRS _mm512_castps512_ps128(__m512 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } +static __inline __m256 __DEFAULT_FN_ATTRS +_mm512_castps512_ps256 (__m512 __A) +{ + return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_castpd_ps (__m512d __A) +{ + return (__m512) (__A); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_castpd_si512 (__m512d __A) +{ + return (__m512i) (__A); +} static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_castpd128_pd512 (__m128d __A) @@ -350,6 +373,18 @@ _mm512_castpd128_pd512 (__m128d __A) return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); } +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_castps_pd (__m512 __A) +{ + return (__m512d) (__A); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_castps_si512 (__m512 __A) +{ + return (__m512i) (__A); +} + static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_castps128_ps512 (__m128 __A) { @@ -368,6 +403,24 @@ _mm512_castsi256_si512 (__m256i __A) return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); } +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_castsi512_ps (__m512i __A) +{ + return (__m512) (__A); +} + +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_castsi512_pd (__m512i __A) +{ + return (__m512d) (__A); +} + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm512_castsi512_si128 (__m512i __A) +{ + return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); +} + /* Bitwise operators */ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi32(__m512i __a, __m512i __b) diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index b98be5ff03..2100507715 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -328,13 +328,6 @@ __m512d test_mm512_set1_pd(double d) return _mm512_set1_pd(d); } -__m512d test_mm512_castpd256_pd512(__m256d a) -{ - // CHECK-LABEL: @test_mm512_castpd256_pd512 - // CHECK: shufflevector <4 x double> {{.*}} - return _mm512_castpd256_pd512(a); -} - __mmask16 test_mm512_knot(__mmask16 a) { // CHECK-LABEL: @test_mm512_knot @@ -5925,10 +5918,25 @@ __m256i test_mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A) { return _mm512_maskz_cvttpd_epu32(__U, __A); } -__m512d test_mm512_castpd128_pd512(__m128d __A) { - // CHECK-LABEL: @test_mm512_castpd128_pd512 - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> - return _mm512_castpd128_pd512(__A); +__m512 test_mm512_castpd_ps (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd_ps + // CHECK: bitcast <8 x double> %1 to <16 x float> + return _mm512_castpd_ps (__A); +} + +__m512d test_mm512_castps_pd (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps_pd + // CHECK: bitcast <16 x float> %1 to <8 x double> + return _mm512_castps_pd (__A); +} + +__m512i test_mm512_castpd_si512 (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd_si512 + // CHECK: bitcast <8 x double> %1 to <8 x i64> + return _mm512_castpd_si512 (__A); } __m512 test_mm512_castps128_ps512(__m128 __A) { @@ -5937,6 +5945,39 @@ __m512 test_mm512_castps128_ps512(__m128 __A) { return _mm512_castps128_ps512(__A); } +__m512d test_mm512_castpd128_pd512(__m128d __A) { + // CHECK-LABEL: @test_mm512_castpd128_pd512 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + return _mm512_castpd128_pd512(__A); +} + +__m512d test_mm512_castpd256_pd512(__m256d a) +{ + // CHECK-LABEL: @test_mm512_castpd256_pd512 + // CHECK: shufflevector <4 x double> {{.*}} + return _mm512_castpd256_pd512(a); +} + +__m256d test_mm512_castpd512_pd256 (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd512_pd256 + // CHECK: shufflevector <8 x double> %1, <8 x double> %2, <4 x i32> + return _mm512_castpd512_pd256 (__A); +} + +__m256 test_mm512_castps512_ps256 (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps512_ps256 + // CHECK: shufflevector <16 x float> %1, <16 x float> %2, <8 x i32> + return _mm512_castps512_ps256 (__A); +} + +__m512i test_mm512_castps_si512 (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps_si512 + // CHECK: bitcast <16 x float> %1 to <8 x i64> + return _mm512_castps_si512 (__A); +} __m512i test_mm512_castsi128_si512(__m128i __A) { // CHECK-LABEL: @test_mm512_castsi128_si512 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> @@ -5949,6 +5990,26 @@ __m512i test_mm512_castsi256_si512(__m256i __A) { return _mm512_castsi256_si512(__A); } +__m512 test_mm512_castsi512_ps (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_ps + // CHECK: bitcast <8 x i64> %1 to <16 x float> + return _mm512_castsi512_ps (__A); +} + +__m512d test_mm512_castsi512_pd (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_pd + // CHECK: bitcast <8 x i64> %1 to <8 x double> + return _mm512_castsi512_pd (__A); +} + +__m128i test_mm512_castsi512_si128 (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_si128 + // CHECK: shufflevector <8 x i64> %1, <8 x i64> %2, <2 x i32> + return _mm512_castsi512_si128 (__A); +} __m128 test_mm_cvt_roundsd_ss(__m128 __A, __m128d __B) { // CHECK-LABEL: @test_mm_cvt_roundsd_ss