typedef int __v4si __attribute__((__vector_size__(16)));
typedef short __v8hi __attribute__((__vector_size__(16)));
-typedef char __v16qi __attribute__((__vector_size__(16)));
+typedef signed char __v16qi __attribute__((__vector_size__(16)));
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_add_sd(__m128d a, __m128d b)
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_epi8(__m128i a, __m128i b)
{
- return (__m128i)__builtin_ia32_pcmpeqb128((__v16qi)a, (__v16qi)b);
+ return (__m128i)((__v16qi)a == (__v16qi)b);
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_epi16(__m128i a, __m128i b)
{
- return (__m128i)__builtin_ia32_pcmpeqw128((__v8hi)a, (__v8hi)b);
+ return (__m128i)((__v8hi)a == (__v8hi)b);
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_epi32(__m128i a, __m128i b)
{
- return (__m128i)__builtin_ia32_pcmpeqd128((__v4si)a, (__v4si)b);
+ return (__m128i)((__v4si)a == (__v4si)b);
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_epi8(__m128i a, __m128i b)
{
- return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)a, (__v16qi)b);
+ return (__m128i)((__v16qi)a > (__v16qi)b);
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_epi16(__m128i a, __m128i b)
{
- return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)a, (__v8hi)b);
+ return (__m128i)((__v8hi)a > (__v8hi)b);
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_epi32(__m128i a, __m128i b)
{
- return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)a, (__v4si)b);
+ return (__m128i)((__v4si)a > (__v4si)b);
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_epi8(__m128i a, __m128i b)
{
- return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)b, (__v16qi)a);
+ return _mm_cmpgt_epi8(b,a);
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_epi16(__m128i a, __m128i b)
{
- return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)b, (__v8hi)a);
+ return _mm_cmpgt_epi16(b,a);
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_epi32(__m128i a, __m128i b)
{
- return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)b, (__v4si)a);
+ return _mm_cmpgt_epi32(b,a);
}
#ifdef __x86_64__
typedef int __v2si __attribute__((__vector_size__(8)));
typedef short __v4hi __attribute__((__vector_size__(8)));
-typedef char __v8qi __attribute__((__vector_size__(8)));
+typedef signed char __v8qi __attribute__((__vector_size__(8)));
static inline void __attribute__((__always_inline__, __nodebug__))
_mm_empty(void)
static inline __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
+ return (__m64)((__v8qi)__m1 == (__v8qi)__m2);
}
static inline __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)((__v4hi)__m1 == (__v4hi)__m2);
}
static inline __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
+ return (__m64)((__v2si)__m1 == (__v2si)__m2);
}
static inline __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
+ return (__m64)((__v8qi)__m1 > (__v8qi)__m2);
}
static inline __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
}
static inline __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
+ return (__m64)((__v2si)__m1 > (__v2si)__m2);
}
static inline __m64 __attribute__((__always_inline__, __nodebug__))