BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "")
BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "")
+BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "")
+BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "")
+BUILTIN(__builtin_ia32_vphaddbq, "V2LLiV16c", "")
+BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "")
+BUILTIN(__builtin_ia32_vphaddwq, "V2LLiV8s", "")
+BUILTIN(__builtin_ia32_vphadddq, "V2LLiV4i", "")
+BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "")
+BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "")
+BUILTIN(__builtin_ia32_vphaddubq, "V2LLiV16c", "")
+BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "")
+BUILTIN(__builtin_ia32_vphadduwq, "V2LLiV8s", "")
+BUILTIN(__builtin_ia32_vphaddudq, "V2LLiV4i", "")
+BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "")
+BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "")
+BUILTIN(__builtin_ia32_vphsubdq, "V2LLiV4i", "")
+
+BUILTIN(__builtin_ia32_vpcmov, "V2LLiV2LLiV2LLiV2LLi", "")
+BUILTIN(__builtin_ia32_vpcmov_256, "V4LLiV4LLiV4LLiV4LLi", "")
+
+BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "")
+
#undef BUILTIN
return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
}
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddw_epi8(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddd_epi8(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddq_epi8(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddd_epi16(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddq_epi16(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddq_epi32(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddw_epu8(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddd_epu8(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddq_epu8(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddd_epu16(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddq_epu16(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_haddq_epu32(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_hsubw_epi8(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_hsubd_epi16(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_hsubq_epi32(__m128i __A)
+{
+ return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
+{
+ return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
+}
+
#endif /* __XOP__ */
#endif /* __XOPINTRIN_H */
// CHECK: @llvm.x86.xop.vpmadcswd
return _mm_maddd_epi16(a, b, c);
}
+
+__m128i test_mm_haddw_epi8(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphaddbw
+ return _mm_haddw_epi8(a);
+}
+
+__m128i test_mm_haddd_epi8(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphaddbd
+ return _mm_haddd_epi8(a);
+}
+
+__m128i test_mm_haddq_epi8(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphaddbq
+ return _mm_haddq_epi8(a);
+}
+
+__m128i test_mm_haddd_epi16(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphaddwd
+ return _mm_haddd_epi16(a);
+}
+
+__m128i test_mm_haddq_epi16(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphaddwq
+ return _mm_haddq_epi16(a);
+}
+
+__m128i test_mm_haddq_epi32(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphadddq
+ return _mm_haddq_epi32(a);
+}
+
+__m128i test_mm_haddw_epu8(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphaddubw
+ return _mm_haddw_epu8(a);
+}
+
+__m128i test_mm_haddd_epu8(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphaddubd
+ return _mm_haddd_epu8(a);
+}
+
+__m128i test_mm_haddq_epu8(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphaddubq
+ return _mm_haddq_epu8(a);
+}
+
+__m128i test_mm_haddd_epu16(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphadduwd
+ return _mm_haddd_epu16(a);
+}
+
+__m128i test_mm_haddq_epu16(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphadduwq
+ return _mm_haddq_epu16(a);
+}
+
+__m128i test_mm_haddq_epu32(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphaddudq
+ return _mm_haddq_epu32(a);
+}
+
+__m128i test_mm_hsubw_epi8(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphsubbw
+ return _mm_hsubw_epi8(a);
+}
+
+__m128i test_mm_hsubd_epi16(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphsubwd
+ return _mm_hsubd_epi16(a);
+}
+
+__m128i test_mm_hsubq_epi32(__m128i a) {
+ // CHECK: @llvm.x86.xop.vphsubdq
+ return _mm_hsubq_epi32(a);
+}
+
+__m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) {
+ // CHECK: @llvm.x86.xop.vpcmov
+ return _mm_cmov_si128(a, b, c);
+}
+
+__m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) {
+ // CHECK: @llvm.x86.xop.vpcmov.256
+ return _mm256_cmov_si256(a, b, c);
+}
+
+__m128i test_mm_perm_epi8(__m128i a, __m128i b, __m128i c) {
+ // CHECK: @llvm.x86.xop.vpperm
+ return _mm_perm_epi8(a, b, c);
+}