From 2b77bd95e57926e506e776ccc46b773180725a9d Mon Sep 17 00:00:00 2001 From: Asaf Badouh Date: Thu, 23 Jul 2015 07:07:25 +0000 Subject: [PATCH] [X86][AVX512BW] add madd and maddubs intrinsics Differential Revision: http://reviews.llvm.org/D11420 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@242986 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 3 ++ lib/Headers/avx512bwintrin.h | 54 +++++++++++++++++++++++++++++ test/CodeGen/avx512bw-builtins.c | 30 ++++++++++++++++ 3 files changed, 87 insertions(+) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index bfd10da50f..84cd669a48 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1228,4 +1228,7 @@ BUILTIN(__builtin_ia32_mulpd512_mask, "V8dV8dV8dV8dUcIi", "") BUILTIN(__builtin_ia32_mulps512_mask, "V16fV16fV16fV16fUsIi", "") BUILTIN(__builtin_ia32_subpd512_mask, "V8dV8dV8dV8dUcIi", "") BUILTIN(__builtin_ia32_subps512_mask, "V16fV16fV16fV16fUsIi", "") +BUILTIN(__builtin_ia32_pmaddubsw512_mask, "V32sV64cV64cV32sUi", "") +BUILTIN(__builtin_ia32_pmaddwd512_mask, "V16iV32sV32sV16iUs", "") + #undef BUILTIN diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index 5e17cbb50e..ec2e163ffd 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -1294,6 +1294,60 @@ _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) (__mmask32) __U); } +static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) +_mm512_maddubs_epi16 (__m512i __X, __m512i __Y) { + return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X, + (__v64qi) __Y, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_maddubs_epi16 (__m512i __W, __mmask32 __U, __m512i __X, + __m512i __Y) { + return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X, + (__v64qi) __Y, + (__v32hi) __W, + (__mmask32) __U); +} + +static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) +_mm512_maskz_maddubs_epi16 (__mmask32 __U, __m512i __X, __m512i __Y) { + return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X, + (__v64qi) __Y, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) __U); +} + +static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) +_mm512_madd_epi16 (__m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_madd_epi16 (__m512i __W, __mmask16 __U, __m512i __A, + __m512i __B) { + return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v16si) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) +_mm512_maskz_madd_epi16 (__mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A, + (__v32hi) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), \ diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c index 9c57b56ec8..7109449636 100644 --- a/test/CodeGen/avx512bw-builtins.c +++ b/test/CodeGen/avx512bw-builtins.c @@ -880,3 +880,33 @@ __m512i test_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) { return _mm512_maskz_mulhi_epu16(__U,__A,__B); } +__m512i test_mm512_maddubs_epi16(__m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_maddubs_epi16 + // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512 + return _mm512_maddubs_epi16(__X,__Y); +} +__m512i test_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_mask_maddubs_epi16 + // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512 + return _mm512_mask_maddubs_epi16(__W,__U,__X,__Y); +} +__m512i test_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_maskz_maddubs_epi16 + // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512 + return _mm512_maskz_maddubs_epi16(__U,__X,__Y); +} +__m512i test_mm512_madd_epi16(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_madd_epi16 + // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512 + return _mm512_madd_epi16(__A,__B); +} +__m512i test_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_madd_epi16 + // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512 + return _mm512_mask_madd_epi16(__W,__U,__A,__B); +} +__m512i test_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_madd_epi16 + // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512 + return _mm512_maskz_madd_epi16(__U,__A,__B); +} -- 2.40.0