From: Craig Topper Date: Sun, 10 Jul 2016 05:57:21 +0000 (+0000) Subject: [X86] Use __butilin_shufflevector for 512-bit shufps intrinsics. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2d0926345d5edc88384bf1516ef6dc694c4cb6c9;p=clang [X86] Use __butilin_shufflevector for 512-bit shufps intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@275012 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index c85c793bf4..e70d751688 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -7181,23 +7181,35 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ (__v8df)_mm512_setzero_pd()); }) -#define _mm512_shuffle_ps(M, V, imm) __extension__ ({ \ - (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \ - (__v16sf)(__m512)(V), (int)(imm), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1); }) - -#define _mm512_mask_shuffle_ps(W, U, M, V, imm) __extension__ ({ \ - (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \ - (__v16sf)(__m512)(V), (int)(imm), \ - (__v16sf)(__m512)(W), \ - (__mmask16)(U)); }) +#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \ + (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + 0 + (((M) >> 0) & 0x3), \ + 0 + (((M) >> 2) & 0x3), \ + 16 + (((M) >> 4) & 0x3), \ + 16 + (((M) >> 6) & 0x3), \ + 4 + (((M) >> 0) & 0x3), \ + 4 + (((M) >> 2) & 0x3), \ + 20 + (((M) >> 4) & 0x3), \ + 20 + (((M) >> 6) & 0x3), \ + 8 + (((M) >> 0) & 0x3), \ + 8 + (((M) >> 2) & 0x3), \ + 24 + (((M) >> 4) & 0x3), \ + 24 + (((M) >> 6) & 0x3), \ + 12 + (((M) >> 0) & 0x3), \ + 12 + (((M) >> 2) & 0x3), \ + 28 + (((M) >> 4) & 0x3), \ + 28 + (((M) >> 6) & 0x3)); }) + +#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ + (__v16sf)(__m512)(W)); }) -#define _mm512_maskz_shuffle_ps(U, M, V, imm) __extension__ ({ \ - (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \ - (__v16sf)(__m512)(V), (int)(imm), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U)); }) +#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ + (__v16sf)_mm512_setzero_ps()); }) #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \ (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index b8121b2f75..e872f4ccbb 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -4234,19 +4234,21 @@ __m512d test_mm512_maskz_shuffle_pd(__mmask8 __U, __m512d __M, __m512d __V) { __m512 test_mm512_shuffle_ps(__m512 __M, __m512 __V) { // CHECK-LABEL: @test_mm512_shuffle_ps - // CHECK: @llvm.x86.avx512.mask.shuf.ps.512 + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> return _mm512_shuffle_ps(__M, __V, 4); } __m512 test_mm512_mask_shuffle_ps(__m512 __W, __mmask16 __U, __m512 __M, __m512 __V) { // CHECK-LABEL: @test_mm512_mask_shuffle_ps - // CHECK: @llvm.x86.avx512.mask.shuf.ps.512 + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_shuffle_ps(__W, __U, __M, __V, 4); } __m512 test_mm512_maskz_shuffle_ps(__mmask16 __U, __m512 __M, __m512 __V) { // CHECK-LABEL: @test_mm512_maskz_shuffle_ps - // CHECK: @llvm.x86.avx512.mask.shuf.ps.512 + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_shuffle_ps(__U, __M, __V, 4); }