From: Craig Topper Date: Sat, 11 Jun 2016 12:50:19 +0000 (+0000) Subject: [AVX512] Implement masked and 512-bit pshufd intrinsics directly with __builtin_shuff... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6cf486c45019908064d7ba6771d17dd38358e665;p=clang [AVX512] Implement masked and 512-bit pshufd intrinsics directly with __builtin_shufflevector and __builtin_ia32_select. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@272467 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index d2da5512a1..e8537ee22b 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -2177,9 +2177,6 @@ TARGET_BUILTIN(__builtin_ia32_movshdup128_mask, "V4fV4fV4fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_movshdup256_mask, "V8fV8fV8fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_movsldup128_mask, "V4fV4fV4fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_movsldup256_mask, "V8fV8fV8fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pshufd512_mask, "V16iV16iIiV16iUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_pshufd256_mask, "V8iV8iIiV8iUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pshufd128_mask, "V4iV4iIiV4iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc","","avx512f") diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index b1f0ebf50f..90b87b2586 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -9052,19 +9052,34 @@ _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) } #define _mm512_shuffle_epi32(A, I) __extension__ ({ \ - (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ - (__v16si)_mm512_undefined_epi32(), \ - (__mmask16)-1); }) + (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ + (__v16si)_mm512_setzero_si512(), \ + 0 + (((I) & 0x03) >> 0), \ + 0 + (((I) & 0x0c) >> 2), \ + 0 + (((I) & 0x30) >> 4), \ + 0 + (((I) & 0xc0) >> 6), \ + 4 + (((I) & 0x03) >> 0), \ + 4 + (((I) & 0x0c) >> 2), \ + 4 + (((I) & 0x30) >> 4), \ + 4 + (((I) & 0xc0) >> 6), \ + 8 + (((I) & 0x03) >> 0), \ + 8 + (((I) & 0x0c) >> 2), \ + 8 + (((I) & 0x30) >> 4), \ + 8 + (((I) & 0xc0) >> 6), \ + 12 + (((I) & 0x03) >> 0), \ + 12 + (((I) & 0x0c) >> 2), \ + 12 + (((I) & 0x30) >> 4), \ + 12 + (((I) & 0xc0) >> 6)); }) #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \ - (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U)); }) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shuffle_epi32((A), (I)), \ + (__v16si)(__m512i)(W)); }) #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \ - (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)); }) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shuffle_epi32((A), (I)), \ + (__v16si)_mm512_setzero_si512()); }) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index 62e5e863ae..830be8d159 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -9257,25 +9257,24 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) } #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\ - (__m256i)__builtin_ia32_pshufd256_mask((__v8si)(__m256i)(A), (int)(I), \ - (__v8si)(__m256i)(W), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shuffle_epi32((A), (I)), \ + (__v8si)(__m256i)(W)); }) #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\ - (__m256i)__builtin_ia32_pshufd256_mask((__v8si)(__m256i)(A), (int)(I), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shuffle_epi32((A), (I)), \ + (__v8si)_mm256_setzero_si256()); }) #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\ - (__m128i)__builtin_ia32_pshufd128_mask((__v4si)(__m128i)(A), (int)(I), \ - (__v4si)(__m128i)(W), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shuffle_epi32((A), (I)), \ + (__v4si)(__m128i)(W)); }) #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\ - (__m128i)__builtin_ia32_pshufd128_mask((__v4si)(__m128i)(A), (int)(I), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)); }) - + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shuffle_epi32((A), (I)), \ + (__v4si)_mm_setzero_si128()); }) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 6f944e0d92..6bd910ed58 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -1566,9 +1566,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_fpclasspd512_mask: case X86::BI__builtin_ia32_fpclasssd_mask: case X86::BI__builtin_ia32_fpclassss_mask: - case X86::BI__builtin_ia32_pshufd512_mask: - case X86::BI__builtin_ia32_pshufd256_mask: - case X86::BI__builtin_ia32_pshufd128_mask: i = 1; l = 0; u = 255; break; case X86::BI__builtin_ia32_palignr: diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 547edd0f64..01cbaf5d07 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -5956,19 +5956,21 @@ __m512 test_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { __m512i test_mm512_shuffle_epi32(__m512i __A) { // CHECK-LABEL: @test_mm512_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.512 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> return _mm512_shuffle_epi32(__A, 1); } __m512i test_mm512_mask_shuffle_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.512 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shuffle_epi32(__W, __U, __A, 1); } __m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.512 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shuffle_epi32(__U, __A, 1); } @@ -7433,4 +7435,4 @@ __m512d test_mm512_setzero_pd() // CHECK-LABEL: @test_mm512_setzero_pd // CHECK: zeroinitializer return _mm512_setzero_pd(); -} \ No newline at end of file +} diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c index e6b13395f9..4555689748 100644 --- a/test/CodeGen/avx512vl-builtins.c +++ b/test/CodeGen/avx512vl-builtins.c @@ -6593,25 +6593,29 @@ __m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) { __m128i test_mm_mask_shuffle_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.128 + // CHECK: shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shuffle_epi32(__W, __U, __A, 1); } __m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.128 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shuffle_epi32(__U, __A, 2); } __m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.256 + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shuffle_epi32(__W, __U, __A, 2); } __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.256 + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shuffle_epi32(__U, __A, 2); }