From: Craig Topper Date: Sat, 24 Feb 2018 06:46:42 +0000 (+0000) Subject: [X86] Remove __builtin_ia32_permvarsf256_mask and __builtin_ia32_permvarsi256_mask... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8d120f1d0c2667d1aebb32d128ad0cbe4d6484ad;p=clang [X86] Remove __builtin_ia32_permvarsf256_mask and __builtin_ia32_permvarsi256_mask and use the avx2 unmasked versions and a select instead. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@326022 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 74c0c039c9..bb53628e91 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1799,8 +1799,6 @@ TARGET_BUILTIN(__builtin_ia32_permvarhi128_mask, "V8sV8sV8sV8sUc","","avx512bw,a TARGET_BUILTIN(__builtin_ia32_permvarhi256_mask, "V16sV16sV16sV16sUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_permvardf256_mask, "V4dV4dV4LLiV4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_permvardi256_mask, "V4LLiV4LLiV4LLiV4LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarsf256_mask, "V8fV8fV8iV8fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarsi256_mask, "V8iV8iV8iV8iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc","","avx512dq,avx512vl") diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index fb8056e3f8..e6d4cea56d 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -8178,60 +8178,41 @@ _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, __M); } -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X, - __m256 __Y) -{ - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, - (__v8si) __X, - (__v8sf) __W, - (__mmask8) __U); -} +#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y) +_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, - (__v8si) __X, - (__v8sf) _mm256_setzero_ps (), - (__mmask8) __U); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_permutexvar_ps(__X, __Y), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_permutexvar_ps (__m256i __X, __m256 __Y) +_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) { - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, - (__v8si) __X, - (__v8sf) _mm256_undefined_si256 (), - (__mmask8) -1); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_permutexvar_ps(__X, __Y), + (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, - (__v8si) __X, - (__v8si) _mm256_setzero_si256 (), - __M); -} +#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, - __m256i __Y) +_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, + __m256i __Y) { - return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, - (__v8si) __X, - (__v8si) __W, - (__mmask8) __M); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, + (__v8si)_mm256_permutexvar_epi32(__X, __Y), + (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y) +_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, - (__v8si) __X, - (__v8si) _mm256_undefined_si256(), - (__mmask8) -1); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, + (__v8si)_mm256_permutexvar_epi32(__X, __Y), + (__v8si)_mm256_setzero_si256()); } #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \ diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c index 008eb09b8b..00a0813801 100644 --- a/test/CodeGen/avx512vl-builtins.c +++ b/test/CodeGen/avx512vl-builtins.c @@ -7019,37 +7019,40 @@ __m256i test_mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X __m256 test_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { // CHECK-LABEL: @test_mm256_mask_permutexvar_ps - // CHECK: @llvm.x86.avx512.mask.permvar.sf.256 + // CHECK: @llvm.x86.avx2.permps + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_permutexvar_ps(__W, __U, __X, __Y); } __m256 test_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) { // CHECK-LABEL: @test_mm256_maskz_permutexvar_ps - // CHECK: @llvm.x86.avx512.mask.permvar.sf.256 + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_permutexvar_ps(__U, __X, __Y); } __m256 test_mm256_permutexvar_ps(__m256i __X, __m256 __Y) { // CHECK-LABEL: @test_mm256_permutexvar_ps - // CHECK: @llvm.x86.avx512.mask.permvar.sf.256 + // CHECK: @llvm.x86.avx2.permps return _mm256_permutexvar_ps( __X, __Y); } __m256i test_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { // CHECK-LABEL: @test_mm256_maskz_permutexvar_epi32 - // CHECK: @llvm.x86.avx512.mask.permvar.si.256 + // CHECK: @llvm.x86.avx2.permd + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_permutexvar_epi32(__M, __X, __Y); } __m256i test_mm256_permutexvar_epi32(__m256i __X, __m256i __Y) { // CHECK-LABEL: @test_mm256_permutexvar_epi32 - // CHECK: @llvm.x86.avx512.mask.permvar.si.256 + // CHECK: @llvm.x86.avx2.permd return _mm256_permutexvar_epi32(__X, __Y); } __m256i test_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { // CHECK-LABEL: @test_mm256_mask_permutexvar_epi32 - // CHECK: @llvm.x86.avx512.mask.permvar.si.256 + // CHECK: @llvm.x86.avx2.permd + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_permutexvar_epi32(__W, __M, __X, __Y); }