From f030cf9cb45f1cf60dbecb28f5f03e76ea3e83aa Mon Sep 17 00:00:00 2001 From: Asaf Badouh Date: Sun, 19 Jul 2015 08:47:31 +0000 Subject: [PATCH] [X86][AVX512BW] add clang intrinsics for pmulhrsw / pmulhuw / pmulhw also made minor fix in "test_mm512_maskz_permutex2var_epi16" Differential Revision: http://reviews.llvm.org/D11336 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@242635 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 4 +- lib/Headers/avx512bwintrin.h | 90 +++++++++++++++++++++++++++++ test/CodeGen/avx512bw-builtins.c | 49 +++++++++++++++- 3 files changed, 141 insertions(+), 2 deletions(-) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 1cd8973cbd..6ef04c76d6 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1217,5 +1217,7 @@ BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "") BUILTIN(__builtin_ia32_vpermt2varhi128_maskz, "V8sV8sV8sV8sUc", "") BUILTIN(__builtin_ia32_vpermt2varhi256_mask, "V16sV16sV16sV16sUs", "") BUILTIN(__builtin_ia32_vpermt2varhi256_maskz, "V16sV16sV16sV16sUs", "") - +BUILTIN(__builtin_ia32_pmulhrsw512_mask, "V32sV32sV32sV32sUi", "") +BUILTIN(__builtin_ia32_pmulhuw512_mask, "V32sV32sV32sV32sUi", "") +BUILTIN(__builtin_ia32_pmulhw512_mask, "V32sV32sV32sV32sUi", "") #undef BUILTIN diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index b0d3462d4d..5e17cbb50e 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -1204,6 +1204,96 @@ _mm512_maskz_permutex2var_epi16 (__mmask32 __U, __m512i __A, (__mmask32) __U); } +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mulhrs_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_mulhrs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_mulhrs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mulhi_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_mulhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_mulhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mulhi_epu16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_mulhi_epu16 (__m512i __W, __mmask32 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) __U); +} + #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), \ diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c index 6cc02ef705..9c57b56ec8 100644 --- a/test/CodeGen/avx512bw-builtins.c +++ b/test/CodeGen/avx512bw-builtins.c @@ -830,6 +830,53 @@ __m512i test_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i _ } __m512i test_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_permutex2var_epi16 - // CHECK: @llvm.x86.avx512.mask.vpermt2var.hi.512 + // CHECK: @llvm.x86.avx512.maskz.vpermt2var.hi.512 return _mm512_maskz_permutex2var_epi16(__U,__A,__I,__B); } + +__m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mulhrs_epi16 + // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512 + return _mm512_mulhrs_epi16(__A,__B); +} +__m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_mulhrs_epi16 + // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512 + return _mm512_mask_mulhrs_epi16(__W,__U,__A,__B); +} +__m512i test_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_mulhrs_epi16 + // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512 + return _mm512_maskz_mulhrs_epi16(__U,__A,__B); +} +__m512i test_mm512_mulhi_epi16(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mulhi_epi16 + // CHECK: @llvm.x86.avx512.mask.pmulh.w.512 + return _mm512_mulhi_epi16(__A,__B); +} +__m512i test_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_mulhi_epi16 + // CHECK: @llvm.x86.avx512.mask.pmulh.w.512 + return _mm512_mask_mulhi_epi16(__W,__U,__A,__B); +} +__m512i test_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_mulhi_epi16 + // CHECK: @llvm.x86.avx512.mask.pmulh.w.512 + return _mm512_maskz_mulhi_epi16(__U,__A,__B); +} +__m512i test_mm512_mulhi_epu16(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mulhi_epu16 + // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512 + return _mm512_mulhi_epu16(__A,__B); +} +__m512i test_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_mulhi_epu16 + // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512 + return _mm512_mask_mulhi_epu16(__W,__U,__A,__B); +} +__m512i test_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_mulhi_epu16 + // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512 + return _mm512_maskz_mulhi_epu16(__U,__A,__B); +} + -- 2.40.0