From: Anders Carlsson Date: Mon, 22 Dec 2008 07:08:03 +0000 (+0000) Subject: Implement the last intrinsics, _mm_insert_pi16 is the last remaining one now. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a6431dc8d2ebf54de31323bae1fd197dc165f8b2;p=clang Implement the last intrinsics, _mm_insert_pi16 is the last remaining one now. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@61330 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/AST/X86Builtins.def b/include/clang/AST/X86Builtins.def index 032adee75d..aa6f2598f9 100644 --- a/include/clang/AST/X86Builtins.def +++ b/include/clang/AST/X86Builtins.def @@ -293,12 +293,7 @@ BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "") BUILTIN(__builtin_ia32_movmskps, "iV4f", "") BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "") BUILTIN(__builtin_ia32_movntps, "vf*V4f", "") -// FIXME: the prototype for __builtin_ia32_movntq changed across different -// versions of GCC. Until we can replace GCC's xmmintrin.h, this is hacked to -// be a vararg builtin instead of taking V1LLi like it should. This loses some -// type checking but makes us compatible with old version of GCC's xmmintrin.h -// file. -BUILTIN(__builtin_ia32_movntq, "vV1LLi*.", "") +BUILTIN(__builtin_ia32_movntq, "vV1LLi*V1LLi", "") BUILTIN(__builtin_ia32_sfence, "v", "") BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "") BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "") diff --git a/lib/Headers/xmmintrin.devel.h b/lib/Headers/xmmintrin.devel.h index 6f52c58291..ecb2f5b794 100644 --- a/lib/Headers/xmmintrin.devel.h +++ b/lib/Headers/xmmintrin.devel.h @@ -371,52 +371,9 @@ static inline __m128 __attribute__((__always_inline__)) _mm_cvtpi32_ps(__m128 a, return __builtin_ia32_cvtpi2ps(a, (__v2si)b); } -static inline __m128 __attribute__((__always_inline__)) _mm_cvtpi16_ps(__m64 a) -{ - /* FIXME: Implement */ - return (__m128){ 0, 0, 0, 0 }; -} - -static inline __m128 __attribute__((__always_inline__)) _mm_cvtpu16_ps(__m64 a) -{ - /* FIXME: Implement */ - return (__m128){ 0, 0, 0, 0 }; -} - -static inline __m128 __attribute__((__always_inline__)) _mm_cvtpi8_ps(__m64 a) -{ - /* FIXME: Implement */ - return (__m128){ 0, 0, 0, 0 }; -} - -static inline __m128 __attribute__((__always_inline__)) _mm_cvtpu8_ps(__m64 a) -{ - /* FIXME: Implement */ - return (__m128){ 0, 0, 0, 0 }; -} - -static inline __m128 __attribute__((__always_inline__)) _mm_cvtpi32x2_ps(__m64 a, __m64 b) -{ - /* FIXME: Implement */ - return (__m128){ 0, 0, 0, 0 }; -} - -static inline __m64 __attribute__((__always_inline__)) _mm_cvtps_pi16(__m128 a) -{ - /* FIXME: Implement */ - return _mm_setzero_si64(); -} - -static inline __m64 __attribute__((__always_inline__)) _mm_cvtps_pi8(__m128 a) -{ - /* FIXME: Implement */ - return _mm_setzero_si64(); -} - static inline float __attribute__((__always_inline__)) _mm_cvtss_f32(__m128 a) { - /* FIXME: Implement */ - return 0; + return a[0]; } static inline __m128 __attribute__((__always_inline__)) _mm_loadh_pi(__m128 a, __m64 const *p) @@ -651,6 +608,92 @@ static inline __m128 __attribute__((__always_inline__)) _mm_movelh_ps(__m128 a, return __builtin_shufflevector(a, b, 0, 1, 4, 5); } +static inline __m128 __attribute__((__always_inline__)) _mm_cvtpi16_ps(__m64 a) +{ + __m64 b, c; + __m128 r; + + b = _mm_setzero_si64(); + b = _mm_cmpgt_pi16(b, a); + c = _mm_unpackhi_pi16(a, b); + r = _mm_setzero_ps(); + r = _mm_cvtpi32_ps(r, c); + r = _mm_movelh_ps(r, r); + c = _mm_unpacklo_pi16(a, b); + r = _mm_cvtpi32_ps(r, c); + + return r; +} + +static inline __m128 __attribute__((__always_inline__)) _mm_cvtpu16_ps(__m64 a) +{ + __m64 b, c; + __m128 r; + + b = _mm_setzero_si64(); + c = _mm_unpackhi_pi16(a, b); + r = _mm_setzero_ps(); + r = _mm_cvtpi32_ps(r, c); + r = _mm_movelh_ps(r, r); + c = _mm_unpacklo_pi16(a, b); + r = _mm_cvtpi32_ps(r, c); + + return r; +} + +static inline __m128 __attribute__((__always_inline__)) _mm_cvtpi8_ps(__m64 a) +{ + __m64 b; + + b = _mm_setzero_si64(); + b = _mm_cmpgt_pi8(b, a); + b = _mm_unpacklo_pi8(a, b); + + return _mm_cvtpi16_ps(b); +} + +static inline __m128 __attribute__((__always_inline__)) _mm_cvtpu8_ps(__m64 a) +{ + __m64 b; + + b = _mm_setzero_si64(); + b = _mm_unpacklo_pi8(a, b); + + return _mm_cvtpi16_ps(b); +} + +static inline __m128 __attribute__((__always_inline__)) _mm_cvtpi32x2_ps(__m64 a, __m64 b) +{ + __m128 c; + + c = _mm_setzero_ps(); + c = _mm_cvtpi32_ps(c, b); + c = _mm_movelh_ps(c, c); + + return _mm_cvtpi32_ps(c, a); +} + +static inline __m64 __attribute__((__always_inline__)) _mm_cvtps_pi16(__m128 a) +{ + __m64 b, c; + + b = _mm_cvtps_pi32(a); + a = _mm_movehl_ps(a, a); + c = _mm_cvtps_pi32(a); + + return _mm_packs_pi16(b, c); +} + +static inline __m64 __attribute__((__always_inline__)) _mm_cvtps_pi8(__m128 a) +{ + __m64 b, c; + + b = _mm_cvtps_pi16(a); + c = _mm_setzero_si64(); + + return _mm_packs_pi16(b, c); +} + static inline int __attribute__((__always_inline__)) _mm_movemask_ps(__m128 a) { return __builtin_ia32_movmskps(a);