BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "")
BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "")
BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "")
+BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "")
BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "")
+BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpunordss, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpneqss, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpnltss, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpnless, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpordss, "V4fV4fV4f", "")
BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "")
BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "")
BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "")
BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "")
BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "")
+BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "")
BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "")
+BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpunordsd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "")
BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "")
BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "")
BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "")
// 3DNow!
case X86::BI__builtin_ia32_pswapdsf:
case X86::BI__builtin_ia32_pswapdsi: {
- const char *name = nullptr;
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ const char *name;
+ Intrinsic::ID ID;
switch(BuiltinID) {
default: llvm_unreachable("Unsupported intrinsic!");
case X86::BI__builtin_ia32_pswapdsf:
Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128);
return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy));
}
+ // SSE comparison intrisics
+ case X86::BI__builtin_ia32_cmpeqps:
+ case X86::BI__builtin_ia32_cmpltps:
+ case X86::BI__builtin_ia32_cmpleps:
+ case X86::BI__builtin_ia32_cmpunordps:
+ case X86::BI__builtin_ia32_cmpneqps:
+ case X86::BI__builtin_ia32_cmpnltps:
+ case X86::BI__builtin_ia32_cmpnleps:
+ case X86::BI__builtin_ia32_cmpordps:
+ case X86::BI__builtin_ia32_cmpeqss:
+ case X86::BI__builtin_ia32_cmpltss:
+ case X86::BI__builtin_ia32_cmpless:
+ case X86::BI__builtin_ia32_cmpunordss:
+ case X86::BI__builtin_ia32_cmpneqss:
+ case X86::BI__builtin_ia32_cmpnltss:
+ case X86::BI__builtin_ia32_cmpnless:
+ case X86::BI__builtin_ia32_cmpordss:
+ case X86::BI__builtin_ia32_cmpeqpd:
+ case X86::BI__builtin_ia32_cmpltpd:
+ case X86::BI__builtin_ia32_cmplepd:
+ case X86::BI__builtin_ia32_cmpunordpd:
+ case X86::BI__builtin_ia32_cmpneqpd:
+ case X86::BI__builtin_ia32_cmpnltpd:
+ case X86::BI__builtin_ia32_cmpnlepd:
+ case X86::BI__builtin_ia32_cmpordpd:
+ case X86::BI__builtin_ia32_cmpeqsd:
+ case X86::BI__builtin_ia32_cmpltsd:
+ case X86::BI__builtin_ia32_cmplesd:
+ case X86::BI__builtin_ia32_cmpunordsd:
+ case X86::BI__builtin_ia32_cmpneqsd:
+ case X86::BI__builtin_ia32_cmpnltsd:
+ case X86::BI__builtin_ia32_cmpnlesd:
+ case X86::BI__builtin_ia32_cmpordsd:
+ // These exist so that the builtin that takes an immediate can be bounds
+ // checked by clang to avoid passing bad immediates to the backend. Since
+ // AVX has a larger immediate than SSE we would need separate builtins to
+ // do the different bounds checking. Rather than create a clang specific
+ // SSE only builtin, this implements eight separate builtins to match gcc
+ // implementation.
+
+ // Choose the immediate.
+ unsigned Imm;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_cmpeqps:
+ case X86::BI__builtin_ia32_cmpeqss:
+ case X86::BI__builtin_ia32_cmpeqpd:
+ case X86::BI__builtin_ia32_cmpeqsd:
+ Imm = 0;
+ break;
+ case X86::BI__builtin_ia32_cmpltps:
+ case X86::BI__builtin_ia32_cmpltss:
+ case X86::BI__builtin_ia32_cmpltpd:
+ case X86::BI__builtin_ia32_cmpltsd:
+ Imm = 1;
+ break;
+ case X86::BI__builtin_ia32_cmpleps:
+ case X86::BI__builtin_ia32_cmpless:
+ case X86::BI__builtin_ia32_cmplepd:
+ case X86::BI__builtin_ia32_cmplesd:
+ Imm = 2;
+ break;
+ case X86::BI__builtin_ia32_cmpunordps:
+ case X86::BI__builtin_ia32_cmpunordss:
+ case X86::BI__builtin_ia32_cmpunordpd:
+ case X86::BI__builtin_ia32_cmpunordsd:
+ Imm = 3;
+ break;
+ case X86::BI__builtin_ia32_cmpneqps:
+ case X86::BI__builtin_ia32_cmpneqss:
+ case X86::BI__builtin_ia32_cmpneqpd:
+ case X86::BI__builtin_ia32_cmpneqsd:
+ Imm = 4;
+ break;
+ case X86::BI__builtin_ia32_cmpnltps:
+ case X86::BI__builtin_ia32_cmpnltss:
+ case X86::BI__builtin_ia32_cmpnltpd:
+ case X86::BI__builtin_ia32_cmpnltsd:
+ Imm = 5;
+ break;
+ case X86::BI__builtin_ia32_cmpnleps:
+ case X86::BI__builtin_ia32_cmpnless:
+ case X86::BI__builtin_ia32_cmpnlepd:
+ case X86::BI__builtin_ia32_cmpnlesd:
+ Imm = 6;
+ break;
+ case X86::BI__builtin_ia32_cmpordps:
+ case X86::BI__builtin_ia32_cmpordss:
+ case X86::BI__builtin_ia32_cmpordpd:
+ case X86::BI__builtin_ia32_cmpordsd:
+ Imm = 7;
+ break;
+ }
+
+ // Choose the intrinsic ID.
+ const char *name;
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_cmpeqps:
+ case X86::BI__builtin_ia32_cmpltps:
+ case X86::BI__builtin_ia32_cmpleps:
+ case X86::BI__builtin_ia32_cmpunordps:
+ case X86::BI__builtin_ia32_cmpneqps:
+ case X86::BI__builtin_ia32_cmpnltps:
+ case X86::BI__builtin_ia32_cmpnleps:
+ case X86::BI__builtin_ia32_cmpordps:
+ name = "cmpps";
+ ID = Intrinsic::x86_sse_cmp_ps;
+ break;
+ case X86::BI__builtin_ia32_cmpeqss:
+ case X86::BI__builtin_ia32_cmpltss:
+ case X86::BI__builtin_ia32_cmpless:
+ case X86::BI__builtin_ia32_cmpunordss:
+ case X86::BI__builtin_ia32_cmpneqss:
+ case X86::BI__builtin_ia32_cmpnltss:
+ case X86::BI__builtin_ia32_cmpnless:
+ case X86::BI__builtin_ia32_cmpordss:
+ name = "cmpss";
+ ID = Intrinsic::x86_sse_cmp_ss;
+ break;
+ case X86::BI__builtin_ia32_cmpeqpd:
+ case X86::BI__builtin_ia32_cmpltpd:
+ case X86::BI__builtin_ia32_cmplepd:
+ case X86::BI__builtin_ia32_cmpunordpd:
+ case X86::BI__builtin_ia32_cmpneqpd:
+ case X86::BI__builtin_ia32_cmpnltpd:
+ case X86::BI__builtin_ia32_cmpnlepd:
+ case X86::BI__builtin_ia32_cmpordpd:
+ name = "cmppd";
+ ID = Intrinsic::x86_sse2_cmp_pd;
+ break;
+ case X86::BI__builtin_ia32_cmpeqsd:
+ case X86::BI__builtin_ia32_cmpltsd:
+ case X86::BI__builtin_ia32_cmplesd:
+ case X86::BI__builtin_ia32_cmpunordsd:
+ case X86::BI__builtin_ia32_cmpneqsd:
+ case X86::BI__builtin_ia32_cmpnltsd:
+ case X86::BI__builtin_ia32_cmpnlesd:
+ case X86::BI__builtin_ia32_cmpordsd:
+ name = "cmpsd";
+ ID = Intrinsic::x86_sse2_cmp_sd;
+ break;
+ }
+
+ Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
+ llvm::Function *F = CGM.getIntrinsic(ID);
+ return Builder.CreateCall(F, Ops, name);
}
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 0);
+ return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 1);
+ return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmple_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 2);
+ return (__m128d)__builtin_ia32_cmplepd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__b, __a, 1);
+ return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__b, __a, 2);
+ return (__m128d)__builtin_ia32_cmplepd(__b, __a);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 7);
+ return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 3);
+ return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 4);
+ return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 5);
+ return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 6);
+ return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__b, __a, 5);
+ return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__b, __a, 6);
+ return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0);
+ return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1);
+ return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmple_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2);
+ return (__m128d)__builtin_ia32_cmplesd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_sd(__m128d __a, __m128d __b)
{
- __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1);
+ __m128d __c = __builtin_ia32_cmpltsd(__b, __a);
return (__m128d) { __c[0], __a[1] };
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_sd(__m128d __a, __m128d __b)
{
- __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2);
+ __m128d __c = __builtin_ia32_cmplesd(__b, __a);
return (__m128d) { __c[0], __a[1] };
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7);
+ return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3);
+ return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4);
+ return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5);
+ return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6);
+ return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_sd(__m128d __a, __m128d __b)
{
- __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5);
+ __m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
return (__m128d) { __c[0], __a[1] };
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_sd(__m128d __a, __m128d __b)
{
- __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6);
+ __m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
return (__m128d) { __c[0], __a[1] };
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 0);
+ return (__m128)__builtin_ia32_cmpeqss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 0);
+ return (__m128)__builtin_ia32_cmpeqps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 1);
+ return (__m128)__builtin_ia32_cmpltss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 1);
+ return (__m128)__builtin_ia32_cmpltps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmple_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 2);
+ return (__m128)__builtin_ia32_cmpless(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmple_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 2);
+ return (__m128)__builtin_ia32_cmpleps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector(__a,
- __builtin_ia32_cmpss(__b, __a, 1),
+ __builtin_ia32_cmpltss(__b, __a),
4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__b, __a, 1);
+ return (__m128)__builtin_ia32_cmpltps(__b, __a);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector(__a,
- __builtin_ia32_cmpss(__b, __a, 2),
+ __builtin_ia32_cmpless(__b, __a),
4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__b, __a, 2);
+ return (__m128)__builtin_ia32_cmpleps(__b, __a);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 4);
+ return (__m128)__builtin_ia32_cmpneqss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 4);
+ return (__m128)__builtin_ia32_cmpneqps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 5);
+ return (__m128)__builtin_ia32_cmpnltss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 5);
+ return (__m128)__builtin_ia32_cmpnltps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 6);
+ return (__m128)__builtin_ia32_cmpnless(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 6);
+ return (__m128)__builtin_ia32_cmpnleps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector(__a,
- __builtin_ia32_cmpss(__b, __a, 5),
+ __builtin_ia32_cmpnltss(__b, __a),
4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__b, __a, 5);
+ return (__m128)__builtin_ia32_cmpnltps(__b, __a);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector(__a,
- __builtin_ia32_cmpss(__b, __a, 6),
+ __builtin_ia32_cmpnless(__b, __a),
4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__b, __a, 6);
+ return (__m128)__builtin_ia32_cmpnleps(__b, __a);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 7);
+ return (__m128)__builtin_ia32_cmpordss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 7);
+ return (__m128)__builtin_ia32_cmpordps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 3);
+ return (__m128)__builtin_ia32_cmpunordss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 3);
+ return (__m128)__builtin_ia32_cmpunordps(__a, __b);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
return _mm_blend_epi16(V1, V2, 42);
}
+
+__m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpeq_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+ return _mm_cmpeq_ss(__a, __b);
+}
+
+__m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmplt_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+ return _mm_cmplt_ss(__a, __b);
+}
+
+__m128 test_mm_cmple_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmple_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+ return _mm_cmple_ss(__a, __b);
+}
+
+__m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpunord_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
+ return _mm_cmpunord_ss(__a, __b);
+}
+
+__m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpneq_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
+ return _mm_cmpneq_ss(__a, __b);
+}
+
+__m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpnlt_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+ return _mm_cmpnlt_ss(__a, __b);
+}
+
+__m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpnle_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+ return _mm_cmpnle_ss(__a, __b);
+}
+
+__m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpord_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
+ return _mm_cmpord_ss(__a, __b);
+}
+
+__m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpgt_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+ return _mm_cmpgt_ss(__a, __b);
+}
+
+__m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpge_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+ return _mm_cmpge_ss(__a, __b);
+}
+
+__m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpngt_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+ return _mm_cmpngt_ss(__a, __b);
+}
+
+__m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpnge_ss
+ // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+ return _mm_cmpnge_ss(__a, __b);
+}
+
+__m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpeq_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+ return _mm_cmpeq_ps(__a, __b);
+}
+
+__m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmplt_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+ return _mm_cmplt_ps(__a, __b);
+}
+
+__m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmple_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+ return _mm_cmple_ps(__a, __b);
+}
+
+__m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpunord_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
+ return _mm_cmpunord_ps(__a, __b);
+}
+
+__m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpneq_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
+ return _mm_cmpneq_ps(__a, __b);
+}
+
+__m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpnlt_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+ return _mm_cmpnlt_ps(__a, __b);
+}
+
+__m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpnle_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+ return _mm_cmpnle_ps(__a, __b);
+}
+
+__m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpord_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
+ return _mm_cmpord_ps(__a, __b);
+}
+
+__m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpgt_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+ return _mm_cmpgt_ps(__a, __b);
+}
+
+__m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpge_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+ return _mm_cmpge_ps(__a, __b);
+}
+
+__m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpngt_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+ return _mm_cmpngt_ps(__a, __b);
+}
+
+__m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
+ // CHECK-LABEL: @test_mm_cmpnge_ps
+ // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+ return _mm_cmpnge_ps(__a, __b);
+}
+
+__m128d test_mm_cmpeq_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpeq_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+ return _mm_cmpeq_sd(__a, __b);
+}
+
+__m128d test_mm_cmplt_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmplt_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+ return _mm_cmplt_sd(__a, __b);
+}
+
+__m128d test_mm_cmple_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmple_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+ return _mm_cmple_sd(__a, __b);
+}
+
+__m128d test_mm_cmpunord_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpunord_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
+ return _mm_cmpunord_sd(__a, __b);
+}
+
+__m128d test_mm_cmpneq_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpneq_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
+ return _mm_cmpneq_sd(__a, __b);
+}
+
+__m128d test_mm_cmpnlt_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpnlt_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+ return _mm_cmpnlt_sd(__a, __b);
+}
+
+__m128d test_mm_cmpnle_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpnle_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+ return _mm_cmpnle_sd(__a, __b);
+}
+
+__m128d test_mm_cmpord_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpord_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
+ return _mm_cmpord_sd(__a, __b);
+}
+
+__m128d test_mm_cmpgt_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpgt_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+ return _mm_cmpgt_sd(__a, __b);
+}
+
+__m128d test_mm_cmpge_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpge_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+ return _mm_cmpge_sd(__a, __b);
+}
+
+__m128d test_mm_cmpngt_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpngt_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+ return _mm_cmpngt_sd(__a, __b);
+}
+
+__m128d test_mm_cmpnge_sd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpnge_sd
+ // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+ return _mm_cmpnge_sd(__a, __b);
+}
+
+__m128d test_mm_cmpeq_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpeq_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+ return _mm_cmpeq_pd(__a, __b);
+}
+
+__m128d test_mm_cmplt_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmplt_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+ return _mm_cmplt_pd(__a, __b);
+}
+
+__m128d test_mm_cmple_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmple_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+ return _mm_cmple_pd(__a, __b);
+}
+
+__m128d test_mm_cmpunord_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpunord_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
+ return _mm_cmpunord_pd(__a, __b);
+}
+
+__m128d test_mm_cmpneq_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpneq_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
+ return _mm_cmpneq_pd(__a, __b);
+}
+
+__m128d test_mm_cmpnlt_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpnlt_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+ return _mm_cmpnlt_pd(__a, __b);
+}
+
+__m128d test_mm_cmpnle_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpnle_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+ return _mm_cmpnle_pd(__a, __b);
+}
+
+__m128d test_mm_cmpord_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpord_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
+ return _mm_cmpord_pd(__a, __b);
+}
+
+__m128d test_mm_cmpgt_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpgt_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+ return _mm_cmpgt_pd(__a, __b);
+}
+
+__m128d test_mm_cmpge_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpge_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+ return _mm_cmpge_pd(__a, __b);
+}
+
+__m128d test_mm_cmpngt_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpngt_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+ return _mm_cmpngt_pd(__a, __b);
+}
+
+__m128d test_mm_cmpnge_pd(__m128d __a, __m128d __b) {
+ // CHECK-LABEL: @test_mm_cmpnge_pd
+ // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+ return _mm_cmpnge_pd(__a, __b);
+}