BUILTIN(__builtin_ia32_subss, "V4fV4fV4f", "")
BUILTIN(__builtin_ia32_mulss, "V4fV4fV4f", "")
BUILTIN(__builtin_ia32_divss, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpeqps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpltps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpleps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpgtps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpgeps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpunordps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpneqps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpnltps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpnleps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpngtps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpngeps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpordps, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpeqss, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpltss, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpless, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpunordss, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpneqss, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpnltss, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpnless, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpngtss, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpngess, "V4iV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpordss, "V4iV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fc", "")
+BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fc", "")
BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "")
BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "")
BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "")
BUILTIN(__builtin_ia32_subsd, "V2dV2dV2d", "")
BUILTIN(__builtin_ia32_mulsd, "V2dV2dV2d", "")
BUILTIN(__builtin_ia32_divsd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpeqpd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpltpd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmplepd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpgtpd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpgepd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpunordpd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpneqpd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpnltpd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpnlepd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpngtpd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpngepd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpordpd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpeqsd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpltsd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmplesd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpunordsd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpneqsd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpnltsd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpnlesd, "V4iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpordsd, "V4iV2dV2d", "")
+BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dc", "")
+BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dc", "")
BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "")
BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "")
BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "")
case X86::BI__builtin_ia32_vec_ext_v4hi:
case X86::BI__builtin_ia32_vec_ext_v2df:
return Builder.CreateExtractElement(Ops[0], Ops[1], "result");
- case X86::BI__builtin_ia32_cmpordss:
- case X86::BI__builtin_ia32_cmpordsd:
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpunordsd:
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpeqsd:
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpltsd:
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmplesd:
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpneqsd:
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnltsd:
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpnlesd: {
- unsigned i = 0;
- const char *name = 0;
- switch (BuiltinID) {
- default: assert(0 && "Unknown compare builtin!");
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpeqsd:
- i = 0;
- name = "cmpeq";
- break;
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpltsd:
- i = 1;
- name = "cmplt";
- break;
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmplesd:
- i = 2;
- name = "cmple";
- break;
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpunordsd:
- i = 3;
- name = "cmpunord";
- break;
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpneqsd:
- i = 4;
- name = "cmpneq";
- break;
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnltsd:
- i = 5;
- name = "cmpntl";
- break;
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpnlesd:
- i = 6;
- name = "cmpnle";
- break;
- case X86::BI__builtin_ia32_cmpordss:
- case X86::BI__builtin_ia32_cmpordsd:
- i = 7;
- name = "cmpord";
- break;
- }
-
- llvm::Function *F;
- if (cast<llvm::VectorType>(Ops[0]->getType())->getElementType() ==
- llvm::Type::FloatTy)
- F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss);
- else
- F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd);
-
- Ops.push_back(llvm::ConstantInt::get(llvm::Type::Int8Ty, i));
- return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name);
+ case X86::BI__builtin_ia32_cmpps: {
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps);
+ return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpps");
+ }
+ case X86::BI__builtin_ia32_cmpss: {
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss);
+ return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpss");
}
case X86::BI__builtin_ia32_ldmxcsr: {
llvm::Type *PtrTy = llvm::PointerType::getUnqual(llvm::Type::Int8Ty);
Builder.CreateBitCast(Tmp, PtrTy));
return Builder.CreateLoad(Tmp, "stmxcsr");
}
- case X86::BI__builtin_ia32_cmpordps:
- case X86::BI__builtin_ia32_cmpordpd:
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpunordpd:
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpeqpd:
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpltpd:
- case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmplepd:
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpneqpd:
- case X86::BI__builtin_ia32_cmpngtps:
- case X86::BI__builtin_ia32_cmpngtpd:
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnltpd:
- case X86::BI__builtin_ia32_cmpgtps:
- case X86::BI__builtin_ia32_cmpgtpd:
- case X86::BI__builtin_ia32_cmpgeps:
- case X86::BI__builtin_ia32_cmpgepd:
- case X86::BI__builtin_ia32_cmpngeps:
- case X86::BI__builtin_ia32_cmpngepd:
- case X86::BI__builtin_ia32_cmpnleps:
- case X86::BI__builtin_ia32_cmpnlepd: {
- unsigned i = 0;
- const char *name = 0;
- bool ShouldSwap = false;
- switch (BuiltinID) {
- default: assert(0 && "Unknown compare builtin!");
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpeqpd: i = 0; name = "cmpeq"; break;
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpltpd: i = 1; name = "cmplt"; break;
- case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmplepd: i = 2; name = "cmple"; break;
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpunordpd: i = 3; name = "cmpunord"; break;
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpneqpd: i = 4; name = "cmpneq"; break;
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnltpd: i = 5; name = "cmpntl"; break;
- case X86::BI__builtin_ia32_cmpnleps:
- case X86::BI__builtin_ia32_cmpnlepd: i = 6; name = "cmpnle"; break;
- case X86::BI__builtin_ia32_cmpordps:
- case X86::BI__builtin_ia32_cmpordpd: i = 7; name = "cmpord"; break;
- case X86::BI__builtin_ia32_cmpgtps:
- case X86::BI__builtin_ia32_cmpgtpd:
- ShouldSwap = true;
- i = 1;
- name = "cmpgt";
- break;
- case X86::BI__builtin_ia32_cmpgeps:
- case X86::BI__builtin_ia32_cmpgepd:
- i = 2;
- name = "cmpge";
- ShouldSwap = true;
- break;
- case X86::BI__builtin_ia32_cmpngtps:
- case X86::BI__builtin_ia32_cmpngtpd:
- i = 5;
- name = "cmpngt";
- ShouldSwap = true;
- break;
- case X86::BI__builtin_ia32_cmpngeps:
- case X86::BI__builtin_ia32_cmpngepd:
- i = 6;
- name = "cmpnge";
- ShouldSwap = true;
- break;
- }
-
- if (ShouldSwap)
- std::swap(Ops[0], Ops[1]);
-
- llvm::Function *F;
- if (cast<llvm::VectorType>(Ops[0]->getType())->getElementType() ==
- llvm::Type::FloatTy)
- F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps);
- else
- F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd);
-
- Ops.push_back(llvm::ConstantInt::get(llvm::Type::Int8Ty, i));
- return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name);
+ case X86::BI__builtin_ia32_cmppd: {
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd);
+ return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmppd");
+ }
+ case X86::BI__builtin_ia32_cmpsd: {
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd);
+ return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpsd");
}
case X86::BI__builtin_ia32_movss:
return EmitShuffleVector(Ops[0], Ops[1], 4, 1, 2, 3, "movss");
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpeqpd(a, b);
+ return (__m128d)__builtin_ia32_cmppd(a, b, 0);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpltpd(a, b);
+ return (__m128d)__builtin_ia32_cmppd(a, b, 1);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmple_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmplepd(a, b);
+ return (__m128d)__builtin_ia32_cmppd(a, b, 2);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpltpd(b, a);
+ return (__m128d)__builtin_ia32_cmppd(b, a, 1);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmplepd(b, a);
+ return (__m128d)__builtin_ia32_cmppd(b, a, 2);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpordpd(a, b);
+ return (__m128d)__builtin_ia32_cmppd(a, b, 7);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpunordpd(a, b);
+ return (__m128d)__builtin_ia32_cmppd(a, b, 3);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpneqpd(a, b);
+ return (__m128d)__builtin_ia32_cmppd(a, b, 4);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpnltpd(a, b);
+ return (__m128d)__builtin_ia32_cmppd(a, b, 5);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpnlepd(a, b);
+ return (__m128d)__builtin_ia32_cmppd(a, b, 6);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpnltpd(b, a);
+ return (__m128d)__builtin_ia32_cmppd(b, a, 5);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_pd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpnlepd(b, a);
+ return (__m128d)__builtin_ia32_cmppd(b, a, 6);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpeqsd(a, b);
+ return (__m128d)__builtin_ia32_cmpsd(a, b, 0);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpltsd(a, b);
+ return (__m128d)__builtin_ia32_cmpsd(a, b, 1);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmple_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmplesd(a, b);
+ return (__m128d)__builtin_ia32_cmpsd(a, b, 2);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpltsd(b, a);
+ return (__m128d)__builtin_ia32_cmpsd(b, a, 1);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmplesd(b, a);
+ return (__m128d)__builtin_ia32_cmpsd(b, a, 2);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpordsd(a, b);
+ return (__m128d)__builtin_ia32_cmpsd(a, b, 7);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpunordsd(a, b);
+ return (__m128d)__builtin_ia32_cmpsd(a, b, 3);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpneqsd(a, b);
+ return (__m128d)__builtin_ia32_cmpsd(a, b, 4);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpnltsd(a, b);
+ return (__m128d)__builtin_ia32_cmpsd(a, b, 5);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpnlesd(a, b);
+ return (__m128d)__builtin_ia32_cmpsd(a, b, 6);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpnltsd(b, a);
+ return (__m128d)__builtin_ia32_cmpsd(b, a, 5);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_sd(__m128d a, __m128d b)
{
- return (__m128d)__builtin_ia32_cmpnlesd(b, a);
+ return (__m128d)__builtin_ia32_cmpsd(b, a, 6);
}
static inline int __attribute__((__always_inline__, __nodebug__))
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpeqss(a, b);
+ return (__m128)__builtin_ia32_cmpss(a, b, 0);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpeqps(a, b);
+ return (__m128)__builtin_ia32_cmpps(a, b, 0);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpltss(a, b);
+ return (__m128)__builtin_ia32_cmpss(a, b, 1);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpltps(a, b);
+ return (__m128)__builtin_ia32_cmpps(a, b, 1);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmple_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpless(a, b);
+ return (__m128)__builtin_ia32_cmpss(a, b, 2);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmple_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpleps(a, b);
+ return (__m128)__builtin_ia32_cmpps(a, b, 2);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpltss(b, a);
+ return (__m128)__builtin_ia32_cmpss(b, a, 1);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpltps(b, a);
+ return (__m128)__builtin_ia32_cmpps(b, a, 1);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpless(b, a);
+ return (__m128)__builtin_ia32_cmpss(b, a, 2);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpleps(b, a);
+ return (__m128)__builtin_ia32_cmpps(b, a, 2);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpneqss(a, b);
+ return (__m128)__builtin_ia32_cmpss(a, b, 4);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpneqps(a, b);
+ return (__m128)__builtin_ia32_cmpps(a, b, 4);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpnltss(a, b);
+ return (__m128)__builtin_ia32_cmpss(a, b, 5);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpnltps(a, b);
+ return (__m128)__builtin_ia32_cmpps(a, b, 5);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpnless(a, b);
+ return (__m128)__builtin_ia32_cmpss(a, b, 6);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpnleps(a, b);
+ return (__m128)__builtin_ia32_cmpps(a, b, 6);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpnltss(b, a);
+ return (__m128)__builtin_ia32_cmpss(b, a, 5);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpnltps(b, a);
+ return (__m128)__builtin_ia32_cmpps(b, a, 5);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpnless(b, a);
+ return (__m128)__builtin_ia32_cmpss(b, a, 6);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpnleps(b, a);
+ return (__m128)__builtin_ia32_cmpps(b, a, 6);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpordss(a, b);
+ return (__m128)__builtin_ia32_cmpss(a, b, 7);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpordps(a, b);
+ return (__m128)__builtin_ia32_cmpps(a, b, 7);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_ss(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpunordss(a, b);
+ return (__m128)__builtin_ia32_cmpss(a, b, 3);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_ps(__m128 a, __m128 b)
{
- return (__m128)__builtin_ia32_cmpunordps(a, b);
+ return (__m128)__builtin_ia32_cmpps(a, b, 3);
}
static inline int __attribute__((__always_inline__, __nodebug__))
tmp_V4f = __builtin_ia32_subss(tmp_V4f, tmp_V4f);
tmp_V4f = __builtin_ia32_mulss(tmp_V4f, tmp_V4f);
tmp_V4f = __builtin_ia32_divss(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpeqps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpltps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpleps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpgtps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpgeps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpunordps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpneqps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpnltps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpnleps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpngtps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpngeps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpordps(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpeqss(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpltss(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpless(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpunordss(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpneqss(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpnltss(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpnless(tmp_V4f, tmp_V4f);
-#ifdef USE_ALL
- tmp_V4i = __builtin_ia32_cmpngtss(tmp_V4f, tmp_V4f);
- tmp_V4i = __builtin_ia32_cmpngess(tmp_V4f, tmp_V4f);
-#endif
- tmp_V4i = __builtin_ia32_cmpordss(tmp_V4f, tmp_V4f);
+ tmp_V4f = __builtin_ia32_cmpps(tmp_V4f, tmp_V4f, 0);
+ tmp_V4f = __builtin_ia32_cmpps(tmp_V4f, tmp_V4f, 1);
+ tmp_V4f = __builtin_ia32_cmpps(tmp_V4f, tmp_V4f, 2);
+ tmp_V4f = __builtin_ia32_cmpps(tmp_V4f, tmp_V4f, 3);
+ tmp_V4f = __builtin_ia32_cmpps(tmp_V4f, tmp_V4f, 4);
+ tmp_V4f = __builtin_ia32_cmpps(tmp_V4f, tmp_V4f, 5);
+ tmp_V4f = __builtin_ia32_cmpps(tmp_V4f, tmp_V4f, 6);
+ tmp_V4f = __builtin_ia32_cmpps(tmp_V4f, tmp_V4f, 7);
+ tmp_V4f = __builtin_ia32_cmpss(tmp_V4f, tmp_V4f, 0);
+ tmp_V4f = __builtin_ia32_cmpss(tmp_V4f, tmp_V4f, 1);
+ tmp_V4f = __builtin_ia32_cmpss(tmp_V4f, tmp_V4f, 2);
+ tmp_V4f = __builtin_ia32_cmpss(tmp_V4f, tmp_V4f, 3);
+ tmp_V4f = __builtin_ia32_cmpss(tmp_V4f, tmp_V4f, 4);
+ tmp_V4f = __builtin_ia32_cmpss(tmp_V4f, tmp_V4f, 5);
+ tmp_V4f = __builtin_ia32_cmpss(tmp_V4f, tmp_V4f, 6);
+ tmp_V4f = __builtin_ia32_cmpss(tmp_V4f, tmp_V4f, 7);
tmp_V4f = __builtin_ia32_minps(tmp_V4f, tmp_V4f);
tmp_V4f = __builtin_ia32_maxps(tmp_V4f, tmp_V4f);
tmp_V4f = __builtin_ia32_minss(tmp_V4f, tmp_V4f);
tmp_V2d = __builtin_ia32_subsd(tmp_V2d, tmp_V2d);
tmp_V2d = __builtin_ia32_mulsd(tmp_V2d, tmp_V2d);
tmp_V2d = __builtin_ia32_divsd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpeqpd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpltpd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmplepd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpgtpd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpgepd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpunordpd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpneqpd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpnltpd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpnlepd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpngtpd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpngepd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpordpd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpeqsd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpltsd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmplesd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpunordsd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpneqsd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpnltsd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpnlesd(tmp_V2d, tmp_V2d);
- tmp_V4i = __builtin_ia32_cmpordsd(tmp_V2d, tmp_V2d);
+ tmp_V2d = __builtin_ia32_cmppd(tmp_V2d, tmp_V2d, 0);
+ tmp_V2d = __builtin_ia32_cmppd(tmp_V2d, tmp_V2d, 1);
+ tmp_V2d = __builtin_ia32_cmppd(tmp_V2d, tmp_V2d, 2);
+ tmp_V2d = __builtin_ia32_cmppd(tmp_V2d, tmp_V2d, 3);
+ tmp_V2d = __builtin_ia32_cmppd(tmp_V2d, tmp_V2d, 4);
+ tmp_V2d = __builtin_ia32_cmppd(tmp_V2d, tmp_V2d, 5);
+ tmp_V2d = __builtin_ia32_cmppd(tmp_V2d, tmp_V2d, 6);
+ tmp_V2d = __builtin_ia32_cmppd(tmp_V2d, tmp_V2d, 7);
+ tmp_V2d = __builtin_ia32_cmpsd(tmp_V2d, tmp_V2d, 0);
+ tmp_V2d = __builtin_ia32_cmpsd(tmp_V2d, tmp_V2d, 1);
+ tmp_V2d = __builtin_ia32_cmpsd(tmp_V2d, tmp_V2d, 2);
+ tmp_V2d = __builtin_ia32_cmpsd(tmp_V2d, tmp_V2d, 3);
+ tmp_V2d = __builtin_ia32_cmpsd(tmp_V2d, tmp_V2d, 4);
+ tmp_V2d = __builtin_ia32_cmpsd(tmp_V2d, tmp_V2d, 5);
+ tmp_V2d = __builtin_ia32_cmpsd(tmp_V2d, tmp_V2d, 6);
+ tmp_V2d = __builtin_ia32_cmpsd(tmp_V2d, tmp_V2d, 7);
tmp_V2d = __builtin_ia32_minpd(tmp_V2d, tmp_V2d);
tmp_V2d = __builtin_ia32_maxpd(tmp_V2d, tmp_V2d);
tmp_V2d = __builtin_ia32_minsd(tmp_V2d, tmp_V2d);