From: Craig Topper Date: Sat, 2 Jul 2016 05:36:43 +0000 (+0000) Subject: [X86] Replace 128-bit and 256 masked vpermilps/vpermilpd builtins with native IR. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0cf7afc057e83969413f10f3817eb107ac8a0933;p=clang [X86] Replace 128-bit and 256 masked vpermilps/vpermilpd builtins with native IR. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274425 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 0ed9563b7f..e15462b86a 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1800,10 +1800,6 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2vard512_maskz, "V16iV16iV16iV16iUs","","avx TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_maskz, "V8dV8LLiV8dV8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_maskz, "V16fV16iV16fV16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermilpd_mask, "V2dV2dIiV2dUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermilpd256_mask, "V4dV4dIiV4dUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermilps_mask, "V4fV4fIiV4fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermilps256_mask, "V8fV8fIiV8fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_vpermilvarpd_mask, "V2dV2dV2LLiV2dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256_mask, "V4dV4dV4LLiV4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_vpermilvarps_mask, "V4fV4fV4iV4fUc","","avx512vl") diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index 6bef81c096..e97ce7179c 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -6700,43 +6700,44 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) } #define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \ - (__m128d)__builtin_ia32_vpermilpd_mask((__v2df)(__m128d)(X), (int)(C), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) + (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ + (__v2df)_mm_permute_pd((X), (C)), \ + (__v2df)(__m128d)(W)); }) #define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \ - (__m128d)__builtin_ia32_vpermilpd_mask((__v2df)(__m128d)(X), (int)(C), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) + (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ + (__v2df)_mm_permute_pd((X), (C)), \ + (__v2df)_mm_setzero_pd()); }) #define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \ - (__m256d)__builtin_ia32_vpermilpd256_mask((__v4df)(__m256d)(X), (int)(C), \ - (__v4df)(__m256d)(W), \ - (__mmask8)(U)); }) + (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm256_permute_pd((X), (C)), \ + (__v4df)(__m256d)(W)); }) #define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \ - (__m256d)__builtin_ia32_vpermilpd256_mask((__v4df)(__m256d)(X), (int)(C), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)); }) + (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm256_permute_pd((X), (C)), \ + (__v4df)_mm256_setzero_pd()); }) #define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \ - (__m128)__builtin_ia32_vpermilps_mask((__v4sf)(__m128)(X), (int)(C), \ - (__v4sf)(__m128)(W), (__mmask8)(U)); }) + (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4sf)_mm_permute_ps((X), (C)), \ + (__v4sf)(__m128)(W)); }) #define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \ - (__m128)__builtin_ia32_vpermilps_mask((__v4sf)(__m128)(X), (int)(C), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)); }) + (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4sf)_mm_permute_ps((X), (C)), \ + (__v4sf)_mm_setzero_ps()); }) #define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \ - (__m256)__builtin_ia32_vpermilps256_mask((__v8sf)(__m256)(X), (int)(C), \ - (__v8sf)(__m256)(W), \ - (__mmask8)(U)); }) + (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm256_permute_ps((X), (C)), \ + (__v8sf)(__m256)(W)); }) #define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \ - (__m256)__builtin_ia32_vpermilps256_mask((__v8sf)(__m256)(X), (int)(C), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)); }) + (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm256_permute_ps((X), (C)), \ + (__v8sf)_mm256_setzero_ps()); }) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A, diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 866c25d6ac..89e5e3e5af 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -1388,7 +1388,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI_mm_prefetch: case X86::BI__builtin_ia32_extractf32x4_mask: case X86::BI__builtin_ia32_extracti32x4_mask: - case X86::BI__builtin_ia32_vpermilpd_mask: case X86::BI__builtin_ia32_extractf64x2_512_mask: case X86::BI__builtin_ia32_extracti64x2_512_mask: i = 1; l = 0; u = 3; @@ -1459,7 +1458,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_roundpd: case X86::BI__builtin_ia32_roundps256: case X86::BI__builtin_ia32_roundpd256: - case X86::BI__builtin_ia32_vpermilpd256_mask: i = 1; l = 0; u = 15; break; case X86::BI__builtin_ia32_roundss: @@ -1563,8 +1561,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_fpclasspd512_mask: case X86::BI__builtin_ia32_fpclasssd_mask: case X86::BI__builtin_ia32_fpclassss_mask: - case X86::BI__builtin_ia32_vpermilps_mask: - case X86::BI__builtin_ia32_vpermilps256_mask: case X86::BI__builtin_ia32_vpermilps512_mask: i = 1; l = 0; u = 255; break; diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c index 340a9d0100..33fcae285e 100644 --- a/test/CodeGen/avx512vl-builtins.c +++ b/test/CodeGen/avx512vl-builtins.c @@ -4610,50 +4610,58 @@ __m256 test_mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A) { __m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 __U, __m128d __X) { // CHECK-LABEL: @test_mm_mask_permute_pd - // CHECK: @llvm.x86.avx512.mask.vpermil.pd - return _mm_mask_permute_pd(__W, __U, __X, 2); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + return _mm_mask_permute_pd(__W, __U, __X, 1); } __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) { // CHECK-LABEL: @test_mm_maskz_permute_pd - // CHECK: @llvm.x86.avx512.mask.vpermil.pd - return _mm_maskz_permute_pd(__U, __X, 2); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + return _mm_maskz_permute_pd(__U, __X, 1); } __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) { // CHECK-LABEL: @test_mm256_mask_permute_pd - // CHECK: @llvm.x86.avx512.mask.vpermil.pd.256 - return _mm256_mask_permute_pd(__W, __U, __X, 2); + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_mask_permute_pd(__W, __U, __X, 5); } __m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) { // CHECK-LABEL: @test_mm256_maskz_permute_pd - // CHECK: @llvm.x86.avx512.mask.vpermil.pd.256 - return _mm256_maskz_permute_pd(__U, __X, 2); + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_maskz_permute_pd(__U, __X, 5); } __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) { // CHECK-LABEL: @test_mm_mask_permute_ps - // CHECK: @llvm.x86.avx512.mask.vpermil.ps - return _mm_mask_permute_ps(__W, __U, __X, 2); + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + return _mm_mask_permute_ps(__W, __U, __X, 0x1b); } __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) { // CHECK-LABEL: @test_mm_maskz_permute_ps - // CHECK: @llvm.x86.avx512.mask.vpermil.ps - return _mm_maskz_permute_ps(__U, __X, 2); + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + return _mm_maskz_permute_ps(__U, __X, 0x1b); } __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) { // CHECK-LABEL: @test_mm256_mask_permute_ps - // CHECK: @llvm.x86.avx512.mask.vpermil.ps.256 - return _mm256_mask_permute_ps(__W, __U, __X, 2); + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_mask_permute_ps(__W, __U, __X, 0x1b); } __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) { // CHECK-LABEL: @test_mm256_maskz_permute_ps - // CHECK: @llvm.x86.avx512.mask.vpermil.ps.256 - return _mm256_maskz_permute_ps(__U, __X, 2); + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_maskz_permute_ps(__U, __X, 0x1b); } __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {