From: Craig Topper Date: Fri, 23 Dec 2011 08:31:16 +0000 (+0000) Subject: Add AVX2 multiply intrinsics. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=28a324a30b0677309a4c5d73ef5197398265e129;p=clang Add AVX2 multiply intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@147219 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index b9d5b4fa99..458c302a9a 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -527,5 +527,10 @@ BUILTIN(__builtin_ia32_pmovzxbq256, "V4LLiV16c", "") BUILTIN(__builtin_ia32_pmovzxwd256, "V8iV8s", "") BUILTIN(__builtin_ia32_pmovzxwq256, "V4LLiV8s", "") BUILTIN(__builtin_ia32_pmovzxdq256, "V4LLiV4i", "") +BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "") +BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "") +BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "") +BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "") +BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "") #undef BUILTIN diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h index fe6137c47c..a1e9915633 100644 --- a/lib/Headers/avx2intrin.h +++ b/lib/Headers/avx2intrin.h @@ -405,6 +405,48 @@ _mm256_cvtepu32_epi64(__m128i __V) return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V); } +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mul_epi32(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_pmuldq256((__v8si)a, (__v8si)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mulhrs_epi16(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)a, (__v16hi)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mulhi_epu16(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)a, (__v16hi)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mulhi_epi16(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_pmulhw256((__v16hi)a, (__v16hi)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mullo_epi16(__m256i a, __m256i b) +{ + return (__m256i)((__v16hi)a * (__v16hi)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mullo_epi32 (__m256i a, __m256i b) +{ + return (__m256i)((__v8si)a * (__v8si)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mul_epu32(__m256i a, __m256i b) +{ + return __builtin_ia32_pmuludq256((__v8si)a, (__v8si)b); +} + static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_or_si256(__m256i a, __m256i b) { diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c index 9e8f471252..bc4a86e00d 100644 --- a/test/CodeGen/avx2-builtins.c +++ b/test/CodeGen/avx2-builtins.c @@ -380,3 +380,38 @@ __m256 test_mm256_cvtepu32_epi64(__m128 a) { // CHECK: @llvm.x86.avx2.pmovzxdq return _mm256_cvtepu32_epi64(a); } + +__m256 test_mm256_mul_epi32(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.pmul.dq + return _mm256_mul_epi32(a, b); +} + +__m256 test_mm256_mulhrs_epi16(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.pmul.hr.sw + return _mm256_mulhrs_epi16(a, b); +} + +__m256 test_mm256_mulhi_epu16(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.pmulhu.w + return _mm256_mulhi_epu16(a, b); +} + +__m256 test_mm256_mulhi_epi16(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.pmulh.w + return _mm256_mulhi_epi16(a, b); +} + +__m256 test_mm256_mullo_epi16(__m256 a, __m256 b) { + // CHECK: mul <16 x i16> + return _mm256_mullo_epi16(a, b); +} + +__m256 test_mm256_mullo_epi32(__m256 a, __m256 b) { + // CHECK: mul <8 x i32> + return _mm256_mullo_epi32(a, b); +} + +__m256 test_mm256_mul_epu32(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.pmulu.dq + return _mm256_mul_epu32(a, b); +}