From: Craig Topper Date: Wed, 21 Dec 2011 08:17:40 +0000 (+0000) Subject: Add AVX2 horizontal add/sub intrinsics. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=318e460ada6e589bd864d9ecb86053cc6852cabf;p=clang Add AVX2 horizontal add/sub intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@147047 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index e72d86bb7f..6a77468314 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -494,5 +494,11 @@ BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "") BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "") BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "") BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIi", "") +BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "") +BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "") +BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "") +BUILTIN(__builtin_ia32_phsubw256, "V16sV16sV16s", "") +BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "") +BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "") #undef BUILTIN diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h index 813c602527..38891670a0 100644 --- a/lib/Headers/avx2intrin.h +++ b/lib/Headers/avx2intrin.h @@ -207,6 +207,42 @@ _mm256_cmpgt_epi64(__m256i a, __m256i b) return (__m256i)((__v4di)a > (__v4di)b); } +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hadd_epi16(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_phaddw256((__v16hi)a, (__v16hi)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hadd_epi32(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_phaddd256((__v8si)a, (__v8si)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hadds_epi16(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_phaddsw256((__v16hi)a, (__v16hi)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hsub_epi16(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_phsubw256((__v16hi)a, (__v16hi)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hsub_epi32(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_phsubd256((__v8si)a, (__v8si)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hsubs_epi16(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_phsubsw256((__v16hi)a, (__v16hi)b); +} + static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_or_si256(__m256i a, __m256i b) { diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c index aa13b3c268..cc252d7d55 100644 --- a/test/CodeGen/avx2-builtins.c +++ b/test/CodeGen/avx2-builtins.c @@ -215,3 +215,33 @@ __m256 test_mm256_cmpgt_epi64(__m256 a, __m256 b) { // CHECK: icmp sgt <4 x i64> return _mm256_cmpgt_epi64(a, b); } + +__m256 test_mm256_hadd_epi16(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.phadd.w + return _mm256_hadd_epi16(a, b); +} + +__m256 test_mm256_hadd_epi32(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.phadd.d + return _mm256_hadd_epi32(a, b); +} + +__m256 test_mm256_hadds_epi16(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.phadd.sw + return _mm256_hadds_epi16(a, b); +} + +__m256 test_mm256_hsub_epi16(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.phsub.w + return _mm256_hsub_epi16(a, b); +} + +__m256 test_mm256_hsub_epi32(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.phsub.d + return _mm256_hsub_epi32(a, b); +} + +__m256 test_mm256_hsubs_epi16(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.phsub.sw + return _mm256_hsubs_epi16(a, b); +}