From: Eli Friedman Date: Wed, 22 Jul 2009 17:08:01 +0000 (+0000) Subject: Switch some functions from using x86 builtins to using vector X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3a266f28ffb1a1710084a7eb86f140d445c77746;p=clang Switch some functions from using x86 builtins to using vector operations. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@76753 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index 72710be6b7..9db6951292 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -35,7 +35,7 @@ typedef long long __m128i __attribute__((__vector_size__(16))); typedef int __v4si __attribute__((__vector_size__(16))); typedef short __v8hi __attribute__((__vector_size__(16))); -typedef char __v16qi __attribute__((__vector_size__(16))); +typedef signed char __v16qi __attribute__((__vector_size__(16))); static inline __m128d __attribute__((__always_inline__, __nodebug__)) _mm_add_sd(__m128d a, __m128d b) @@ -886,55 +886,55 @@ _mm_srl_epi64(__m128i a, __m128i count) static inline __m128i __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_epi8(__m128i a, __m128i b) { - return (__m128i)__builtin_ia32_pcmpeqb128((__v16qi)a, (__v16qi)b); + return (__m128i)((__v16qi)a == (__v16qi)b); } static inline __m128i __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_epi16(__m128i a, __m128i b) { - return (__m128i)__builtin_ia32_pcmpeqw128((__v8hi)a, (__v8hi)b); + return (__m128i)((__v8hi)a == (__v8hi)b); } static inline __m128i __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_epi32(__m128i a, __m128i b) { - return (__m128i)__builtin_ia32_pcmpeqd128((__v4si)a, (__v4si)b); + return (__m128i)((__v4si)a == (__v4si)b); } static inline __m128i __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_epi8(__m128i a, __m128i b) { - return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)a, (__v16qi)b); + return (__m128i)((__v16qi)a > (__v16qi)b); } static inline __m128i __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_epi16(__m128i a, __m128i b) { - return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)a, (__v8hi)b); + return (__m128i)((__v8hi)a > (__v8hi)b); } static inline __m128i __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_epi32(__m128i a, __m128i b) { - return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)a, (__v4si)b); + return (__m128i)((__v4si)a > (__v4si)b); } static inline __m128i __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_epi8(__m128i a, __m128i b) { - return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)b, (__v16qi)a); + return _mm_cmpgt_epi8(b,a); } static inline __m128i __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_epi16(__m128i a, __m128i b) { - return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)b, (__v8hi)a); + return _mm_cmpgt_epi16(b,a); } static inline __m128i __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_epi32(__m128i a, __m128i b) { - return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)b, (__v4si)a); + return _mm_cmpgt_epi32(b,a); } #ifdef __x86_64__ diff --git a/lib/Headers/mmintrin.h b/lib/Headers/mmintrin.h index 8ea3c470ee..e3cbe48ce0 100644 --- a/lib/Headers/mmintrin.h +++ b/lib/Headers/mmintrin.h @@ -32,7 +32,7 @@ typedef long long __m64 __attribute__((__vector_size__(8))); typedef int __v2si __attribute__((__vector_size__(8))); typedef short __v4hi __attribute__((__vector_size__(8))); -typedef char __v8qi __attribute__((__vector_size__(8))); +typedef signed char __v8qi __attribute__((__vector_size__(8))); static inline void __attribute__((__always_inline__, __nodebug__)) _mm_empty(void) @@ -348,37 +348,37 @@ _mm_xor_si64(__m64 __m1, __m64 __m2) static inline __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)((__v8qi)__m1 == (__v8qi)__m2); } static inline __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)((__v4hi)__m1 == (__v4hi)__m2); } static inline __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); + return (__m64)((__v2si)__m1 == (__v2si)__m2); } static inline __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)((__v8qi)__m1 > (__v8qi)__m2); } static inline __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)((__v4hi)__m1 > (__v4hi)__m2); } static inline __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); + return (__m64)((__v2si)__m1 > (__v2si)__m2); } static inline __m64 __attribute__((__always_inline__, __nodebug__))