From: Craig Topper Date: Sun, 10 Jun 2012 02:46:15 +0000 (+0000) Subject: More XOP intrinsics X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=18b73eacc21c8a7c03168aa30c401d072cc6ffc9;p=clang More XOP intrinsics git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@158287 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index c9afee6332..35739e9649 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -655,4 +655,25 @@ BUILTIN(__builtin_ia32_vpmacsdqh, "V2LLiV4iV4iV2LLi", "") BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "") BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "") +BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "") +BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "") +BUILTIN(__builtin_ia32_vphaddbq, "V2LLiV16c", "") +BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "") +BUILTIN(__builtin_ia32_vphaddwq, "V2LLiV8s", "") +BUILTIN(__builtin_ia32_vphadddq, "V2LLiV4i", "") +BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "") +BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "") +BUILTIN(__builtin_ia32_vphaddubq, "V2LLiV16c", "") +BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "") +BUILTIN(__builtin_ia32_vphadduwq, "V2LLiV8s", "") +BUILTIN(__builtin_ia32_vphaddudq, "V2LLiV4i", "") +BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "") +BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "") +BUILTIN(__builtin_ia32_vphsubdq, "V2LLiV4i", "") + +BUILTIN(__builtin_ia32_vpcmov, "V2LLiV2LLiV2LLiV2LLi", "") +BUILTIN(__builtin_ia32_vpcmov_256, "V4LLiV4LLiV4LLiV4LLi", "") + +BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "") + #undef BUILTIN diff --git a/lib/Headers/xopintrin.h b/lib/Headers/xopintrin.h index 7425611e99..099fca995f 100644 --- a/lib/Headers/xopintrin.h +++ b/lib/Headers/xopintrin.h @@ -106,6 +106,114 @@ _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddw_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddw_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epu16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubw_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubd_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubq_epi32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) +{ + return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); +} + #endif /* __XOP__ */ #endif /* __XOPINTRIN_H */ diff --git a/test/CodeGen/xop-builtins.c b/test/CodeGen/xop-builtins.c index 9fe79e44ae..28708ab9cf 100644 --- a/test/CodeGen/xop-builtins.c +++ b/test/CodeGen/xop-builtins.c @@ -64,3 +64,93 @@ __m128i test_mm_maddd_epi16(__m128i a, __m128i b, __m128i c) { // CHECK: @llvm.x86.xop.vpmadcswd return _mm_maddd_epi16(a, b, c); } + +__m128i test_mm_haddw_epi8(__m128i a) { + // CHECK: @llvm.x86.xop.vphaddbw + return _mm_haddw_epi8(a); +} + +__m128i test_mm_haddd_epi8(__m128i a) { + // CHECK: @llvm.x86.xop.vphaddbd + return _mm_haddd_epi8(a); +} + +__m128i test_mm_haddq_epi8(__m128i a) { + // CHECK: @llvm.x86.xop.vphaddbq + return _mm_haddq_epi8(a); +} + +__m128i test_mm_haddd_epi16(__m128i a) { + // CHECK: @llvm.x86.xop.vphaddwd + return _mm_haddd_epi16(a); +} + +__m128i test_mm_haddq_epi16(__m128i a) { + // CHECK: @llvm.x86.xop.vphaddwq + return _mm_haddq_epi16(a); +} + +__m128i test_mm_haddq_epi32(__m128i a) { + // CHECK: @llvm.x86.xop.vphadddq + return _mm_haddq_epi32(a); +} + +__m128i test_mm_haddw_epu8(__m128i a) { + // CHECK: @llvm.x86.xop.vphaddubw + return _mm_haddw_epu8(a); +} + +__m128i test_mm_haddd_epu8(__m128i a) { + // CHECK: @llvm.x86.xop.vphaddubd + return _mm_haddd_epu8(a); +} + +__m128i test_mm_haddq_epu8(__m128i a) { + // CHECK: @llvm.x86.xop.vphaddubq + return _mm_haddq_epu8(a); +} + +__m128i test_mm_haddd_epu16(__m128i a) { + // CHECK: @llvm.x86.xop.vphadduwd + return _mm_haddd_epu16(a); +} + +__m128i test_mm_haddq_epu16(__m128i a) { + // CHECK: @llvm.x86.xop.vphadduwq + return _mm_haddq_epu16(a); +} + +__m128i test_mm_haddq_epu32(__m128i a) { + // CHECK: @llvm.x86.xop.vphaddudq + return _mm_haddq_epu32(a); +} + +__m128i test_mm_hsubw_epi8(__m128i a) { + // CHECK: @llvm.x86.xop.vphsubbw + return _mm_hsubw_epi8(a); +} + +__m128i test_mm_hsubd_epi16(__m128i a) { + // CHECK: @llvm.x86.xop.vphsubwd + return _mm_hsubd_epi16(a); +} + +__m128i test_mm_hsubq_epi32(__m128i a) { + // CHECK: @llvm.x86.xop.vphsubdq + return _mm_hsubq_epi32(a); +} + +__m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) { + // CHECK: @llvm.x86.xop.vpcmov + return _mm_cmov_si128(a, b, c); +} + +__m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) { + // CHECK: @llvm.x86.xop.vpcmov.256 + return _mm256_cmov_si256(a, b, c); +} + +__m128i test_mm_perm_epi8(__m128i a, __m128i b, __m128i c) { + // CHECK: @llvm.x86.xop.vpperm + return _mm_perm_epi8(a, b, c); +}