From: Craig Topper Date: Sat, 29 Oct 2016 19:02:07 +0000 (+0000) Subject: [AVX-512] Remove masked 128/256-bit pmuludq/pmuldq builtins and replace them with... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ec6b6a8c3525b6de2ed732f1e94974ea4c9bfe7f;p=clang [AVX-512] Remove masked 128/256-bit pmuludq/pmuldq builtins and replace them with unmasked builtins and a select. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@285503 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index ec28633afa..6dde84ed4d 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1076,11 +1076,6 @@ TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8LLiV8LLiIiUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmuldq256_mask, "V4LLiV8iV8iV4LLiUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmuldq128_mask, "V2LLiV4iV4iV2LLiUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmuludq256_mask, "V4LLiV8iV8iV4LLiUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmuludq128_mask, "V2LLiV4iV4iV2LLiUc", "", "avx512vl") - TARGET_BUILTIN(__builtin_ia32_pabsb512_mask, "V64cV64cV64cULLi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pabsw512_mask, "V32sV32sV32sUi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packssdw512_mask, "V32sV16iV16iV32sUi", "", "avx512bw") diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index 9e101469d0..3a8ccb4ab9 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -744,79 +744,67 @@ _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, - __m256i __Y) +_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X, - (__v8si) __Y, - (__v4di) __W, __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_mul_epi32(__X, __Y), + (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) +_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X, - (__v8si) __Y, - (__v4di) - _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_mul_epi32(__X, __Y), + (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, - __m128i __Y) +_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X, - (__v4si) __Y, - (__v2di) __W, __M); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_mul_epi32(__X, __Y), + (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) +_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X, - (__v4si) __Y, - (__v2di) - _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_mul_epi32(__X, __Y), + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, - __m256i __Y) +_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X, - (__v8si) __Y, - (__v4di) __W, __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_mul_epu32(__X, __Y), + (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) +_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X, - (__v8si) __Y, - (__v4di) - _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_mul_epu32(__X, __Y), + (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, - __m128i __Y) +_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X, - (__v4si) __Y, - (__v2di) __W, __M); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_mul_epu32(__X, __Y), + (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) +_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X, - (__v4si) __Y, - (__v2di) - _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_mul_epu32(__X, __Y), + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c index 9ebbc13829..bb92d2a21e 100644 --- a/test/CodeGen/avx512vl-builtins.c +++ b/test/CodeGen/avx512vl-builtins.c @@ -727,13 +727,15 @@ __m128i test_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { __m256i test_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_mask_mul_epi32 - //CHECK: @llvm.x86.avx512.mask.pmul.dq.256 + //CHECK: @llvm.x86.avx2.pmul.dq + //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_mul_epi32(__W, __M, __X, __Y); } __m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_maskz_mul_epi32 - //CHECK: @llvm.x86.avx512.mask.pmul.dq.256 + //CHECK: @llvm.x86.avx2.pmul.dq + //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_mul_epi32(__M, __X, __Y); } @@ -741,39 +743,45 @@ __m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) { __m128i test_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_mask_mul_epi32 - //CHECK: @llvm.x86.avx512.mask.pmul.dq.128 + //CHECK: @llvm.x86.sse41.pmuldq + //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_mul_epi32(__W, __M, __X, __Y); } __m128i test_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_maskz_mul_epi32 - //CHECK: @llvm.x86.avx512.mask.pmul.dq.128 + //CHECK: @llvm.x86.sse41.pmuldq + //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_mul_epi32(__M, __X, __Y); } __m256i test_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_mask_mul_epu32 - //CHECK: @llvm.x86.avx512.mask.pmulu.dq.256 + //CHECK: @llvm.x86.avx2.pmulu.dq + //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_mul_epu32(__W, __M, __X, __Y); } __m256i test_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_maskz_mul_epu32 - //CHECK: @llvm.x86.avx512.mask.pmulu.dq.256 + //CHECK: @llvm.x86.avx2.pmulu.dq + //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_mul_epu32(__M, __X, __Y); } __m128i test_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_mask_mul_epu32 - //CHECK: @llvm.x86.avx512.mask.pmulu.dq.128 + //CHECK: @llvm.x86.sse2.pmulu.dq + //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_mul_epu32(__W, __M, __X, __Y); } __m128i test_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_maskz_mul_epu32 - //CHECK: @llvm.x86.avx512.mask.pmulu.dq.128 + //CHECK: @llvm.x86.sse2.pmulu.dq + //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_mul_epu32(__M, __X, __Y); }