static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__b, __a, 1);
+ __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1);
+ return (__m128d) { __c[0], __a[1] };
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__b, __a, 2);
+ __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2);
+ return (__m128d) { __c[0], __a[1] };
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__b, __a, 5);
+ __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5);
+ return (__m128d) { __c[0], __a[1] };
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__b, __a, 6);
+ __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6);
+ return (__m128d) { __c[0], __a[1] };
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__b, __a, 1);
+ return (__m128)__builtin_shufflevector(__a,
+ __builtin_ia32_cmpss(__b, __a, 1),
+ 4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__b, __a, 2);
+ return (__m128)__builtin_shufflevector(__a,
+ __builtin_ia32_cmpss(__b, __a, 2),
+ 4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__b, __a, 5);
+ return (__m128)__builtin_shufflevector(__a,
+ __builtin_ia32_cmpss(__b, __a, 5),
+ 4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__b, __a, 6);
+ return (__m128)__builtin_shufflevector(__a,
+ __builtin_ia32_cmpss(__b, __a, 6),
+ 4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
// CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 13)
return _mm_cmp_ss(a, b, _CMP_GE_OS);
}
+
+__m128 test_cmpgt_ss(__m128 a, __m128 b) {
+ // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 1)
+ // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ return _mm_cmpgt_ss(a, b);
+}
+
+__m128 test_cmpge_ss(__m128 a, __m128 b) {
+ // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 2)
+ // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ return _mm_cmpge_ss(a, b);
+}
+
+__m128 test_cmpngt_ss(__m128 a, __m128 b) {
+ // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 5)
+ // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ return _mm_cmpngt_ss(a, b);
+}
+
+__m128 test_cmpnge_ss(__m128 a, __m128 b) {
+ // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 6)
+ // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ return _mm_cmpnge_ss(a, b);
+}
+
+__m128d test_cmpgt_sd(__m128d a, __m128d b) {
+ // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 1)
+ // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+ return _mm_cmpgt_sd(a, b);
+}
+
+__m128d test_cmpge_sd(__m128d a, __m128d b) {
+ // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 2)
+ // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+ return _mm_cmpge_sd(a, b);
+}
+
+__m128d test_cmpngt_sd(__m128d a, __m128d b) {
+ // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 5)
+ // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+ return _mm_cmpngt_sd(a, b);
+}
+
+__m128d test_cmpnge_sd(__m128d a, __m128d b) {
+ // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 6)
+ // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+ return _mm_cmpnge_sd(a, b);
+}