From: Craig Topper Date: Wed, 11 Apr 2018 04:55:10 +0000 (+0000) Subject: [X86] Replace 512-bit masked pmaddubsw and pmaddwd intrinsic with unmasked intrinsic... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f0445566e4bcb089937c3714a136d472079ea210;p=clang [X86] Replace 512-bit masked pmaddubsw and pmaddwd intrinsic with unmasked intrinsic and a select. This makes it consistent with the 128/256-bit functions. Someday maybe we'll have all the masking moved to selects. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@329775 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 44592d1c6c..6d06d5c2d0 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1151,8 +1151,8 @@ TARGET_BUILTIN(__builtin_ia32_mulps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512 TARGET_BUILTIN(__builtin_ia32_subpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_subps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaddubsw512_mask, "V32sV64cV64cV32sUi", "", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaddwd512_mask, "V16iV32sV32sV16iUs", "", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_addss_round_mask, "V4fV4fV4fV4fUcIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_divss_round_mask, "V4fV4fV4fV4fUcIi", "", "avx512f") diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index 2aa7214403..73e1aad80b 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -1075,53 +1075,42 @@ _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maddubs_epi16 (__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X, - (__v64qi) __Y, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); +_mm512_maddubs_epi16(__m512i __X, __m512i __Y) { + return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_maddubs_epi16 (__m512i __W, __mmask32 __U, __m512i __X, - __m512i __Y) { - return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X, - (__v64qi) __Y, - (__v32hi) __W, - (__mmask32) __U); +_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, + __m512i __Y) { + return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, + (__v32hi)_mm512_maddubs_epi16(__X, __Y), + (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_maddubs_epi16 (__mmask32 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X, - (__v64qi) __Y, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __U); +_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { + return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, + (__v32hi)_mm512_maddubs_epi16(__X, __Y), + (__v32hi)_mm512_setzero_hi()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_madd_epi16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v16si) _mm512_setzero_si512(), - (__mmask16) -1); +_mm512_madd_epi16(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd_epi16 (__m512i __W, __mmask16 __U, __m512i __A, - __m512i __B) { - return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v16si) __W, - (__mmask16) __U); +_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512((__mask16)__U, + (__v16si)_mm512_madd_epi16(__A, __B), + (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_madd_epi16 (__mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v16si) _mm512_setzero_si512(), - (__mmask16) __U); +_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512((__mask16)__U, + (__v16si)_mm512_madd_epi16(__A, __B), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m256i __DEFAULT_FN_ATTRS diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c index 3b601f4791..bb644c4423 100644 --- a/test/CodeGen/avx512bw-builtins.c +++ b/test/CodeGen/avx512bw-builtins.c @@ -1036,32 +1036,36 @@ __m512i test_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) { __m512i test_mm512_maddubs_epi16(__m512i __X, __m512i __Y) { // CHECK-LABEL: @test_mm512_maddubs_epi16 - // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512 + // CHECK: @llvm.x86.avx512.pmaddubs.w.512 return _mm512_maddubs_epi16(__X,__Y); } __m512i test_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: @test_mm512_mask_maddubs_epi16 - // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512 + // CHECK: @llvm.x86.avx512.pmaddubs.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_maddubs_epi16(__W,__U,__X,__Y); } __m512i test_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: @test_mm512_maskz_maddubs_epi16 - // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512 + // CHECK: @llvm.x86.avx512.pmaddubs.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_maddubs_epi16(__U,__X,__Y); } __m512i test_mm512_madd_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_madd_epi16 - // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512 + // CHECK: @llvm.x86.avx512.pmaddw.d.512 return _mm512_madd_epi16(__A,__B); } __m512i test_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_madd_epi16 - // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512 + // CHECK: @llvm.x86.avx512.pmaddw.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_madd_epi16(__W,__U,__A,__B); } __m512i test_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_madd_epi16 - // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512 + // CHECK: @llvm.x86.avx512.pmaddw.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_madd_epi16(__U,__A,__B); }