]> granicus.if.org Git - clang/commitdiff
[x86] Add the (v)cmpps/pd/ss/sd builtins to match gcc. Use them in the sse intrinsic...
authorCraig Topper <craig.topper@gmail.com>
Sat, 27 Dec 2014 06:59:57 +0000 (06:59 +0000)
committerCraig Topper <craig.topper@gmail.com>
Sat, 27 Dec 2014 06:59:57 +0000 (06:59 +0000)
This still lower to the same intrinsics as before.

This is preparation for bounds checking the immediate on the avx version of the builtin so we don't pass illegal immediates into the backend. Since SSE uses a smaller size immediate its not possible to bounds check when using a shared builtin. Rather than creating a clang specific builtin for the different immediate, I decided (after consulting with Chandler) that it was better to match gcc.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@224879 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/BuiltinsX86.def
lib/CodeGen/CGBuiltin.cpp
lib/Headers/emmintrin.h
lib/Headers/xmmintrin.h
test/CodeGen/sse-builtins.c

index d32f34c536f7b87dcb663d3cf30d74b64024dd97..d0223772f472f74effb1e79a61a521a097a6a40f 100644 (file)
@@ -187,13 +187,45 @@ BUILTIN(__builtin_ia32_ucomisdgt, "iV2dV2d", "")
 BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "")
 BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "")
 BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "")
+BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "")
 BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "")
+BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpunordss, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpneqss, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpnltss, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpnless, "V4fV4fV4f", "")
+BUILTIN(__builtin_ia32_cmpordss, "V4fV4fV4f", "")
 BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "")
 BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "")
 BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "")
 BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "")
 BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "")
+BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "")
 BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "")
+BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpunordsd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "")
+BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "")
 BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "")
 BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "")
 BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "")
index fca60d50c5623bad647cd7bcfbd498103d48981a..1665460ee82be8e9b189bedbd52b9e768e857400 100644 (file)
@@ -5986,8 +5986,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   // 3DNow!
   case X86::BI__builtin_ia32_pswapdsf:
   case X86::BI__builtin_ia32_pswapdsi: {
-    const char *name = nullptr;
-    Intrinsic::ID ID = Intrinsic::not_intrinsic;
+    const char *name;
+    Intrinsic::ID ID;
     switch(BuiltinID) {
     default: llvm_unreachable("Unsupported intrinsic!");
     case X86::BI__builtin_ia32_pswapdsf:
@@ -6041,6 +6041,154 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128);
     return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy));
   }
+  // SSE comparison intrisics
+  case X86::BI__builtin_ia32_cmpeqps:
+  case X86::BI__builtin_ia32_cmpltps:
+  case X86::BI__builtin_ia32_cmpleps:
+  case X86::BI__builtin_ia32_cmpunordps:
+  case X86::BI__builtin_ia32_cmpneqps:
+  case X86::BI__builtin_ia32_cmpnltps:
+  case X86::BI__builtin_ia32_cmpnleps:
+  case X86::BI__builtin_ia32_cmpordps:
+  case X86::BI__builtin_ia32_cmpeqss:
+  case X86::BI__builtin_ia32_cmpltss:
+  case X86::BI__builtin_ia32_cmpless:
+  case X86::BI__builtin_ia32_cmpunordss:
+  case X86::BI__builtin_ia32_cmpneqss:
+  case X86::BI__builtin_ia32_cmpnltss:
+  case X86::BI__builtin_ia32_cmpnless:
+  case X86::BI__builtin_ia32_cmpordss:
+  case X86::BI__builtin_ia32_cmpeqpd:
+  case X86::BI__builtin_ia32_cmpltpd:
+  case X86::BI__builtin_ia32_cmplepd:
+  case X86::BI__builtin_ia32_cmpunordpd:
+  case X86::BI__builtin_ia32_cmpneqpd:
+  case X86::BI__builtin_ia32_cmpnltpd:
+  case X86::BI__builtin_ia32_cmpnlepd:
+  case X86::BI__builtin_ia32_cmpordpd:
+  case X86::BI__builtin_ia32_cmpeqsd:
+  case X86::BI__builtin_ia32_cmpltsd:
+  case X86::BI__builtin_ia32_cmplesd:
+  case X86::BI__builtin_ia32_cmpunordsd:
+  case X86::BI__builtin_ia32_cmpneqsd:
+  case X86::BI__builtin_ia32_cmpnltsd:
+  case X86::BI__builtin_ia32_cmpnlesd:
+  case X86::BI__builtin_ia32_cmpordsd:
+    // These exist so that the builtin that takes an immediate can be bounds
+    // checked by clang to avoid passing bad immediates to the backend. Since
+    // AVX has a larger immediate than SSE we would need separate builtins to
+    // do the different bounds checking. Rather than create a clang specific
+    // SSE only builtin, this implements eight separate builtins to match gcc
+    // implementation.
+
+    // Choose the immediate.
+    unsigned Imm;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_cmpeqps:
+    case X86::BI__builtin_ia32_cmpeqss:
+    case X86::BI__builtin_ia32_cmpeqpd:
+    case X86::BI__builtin_ia32_cmpeqsd:
+      Imm = 0;
+      break;
+    case X86::BI__builtin_ia32_cmpltps:
+    case X86::BI__builtin_ia32_cmpltss:
+    case X86::BI__builtin_ia32_cmpltpd:
+    case X86::BI__builtin_ia32_cmpltsd:
+      Imm = 1;
+      break;
+    case X86::BI__builtin_ia32_cmpleps:
+    case X86::BI__builtin_ia32_cmpless:
+    case X86::BI__builtin_ia32_cmplepd:
+    case X86::BI__builtin_ia32_cmplesd:
+      Imm = 2;
+      break;
+    case X86::BI__builtin_ia32_cmpunordps:
+    case X86::BI__builtin_ia32_cmpunordss:
+    case X86::BI__builtin_ia32_cmpunordpd:
+    case X86::BI__builtin_ia32_cmpunordsd:
+      Imm = 3;
+      break;
+    case X86::BI__builtin_ia32_cmpneqps:
+    case X86::BI__builtin_ia32_cmpneqss:
+    case X86::BI__builtin_ia32_cmpneqpd:
+    case X86::BI__builtin_ia32_cmpneqsd:
+      Imm = 4;
+      break;
+    case X86::BI__builtin_ia32_cmpnltps:
+    case X86::BI__builtin_ia32_cmpnltss:
+    case X86::BI__builtin_ia32_cmpnltpd:
+    case X86::BI__builtin_ia32_cmpnltsd:
+      Imm = 5;
+      break;
+    case X86::BI__builtin_ia32_cmpnleps:
+    case X86::BI__builtin_ia32_cmpnless:
+    case X86::BI__builtin_ia32_cmpnlepd:
+    case X86::BI__builtin_ia32_cmpnlesd:
+      Imm = 6;
+      break;
+    case X86::BI__builtin_ia32_cmpordps:
+    case X86::BI__builtin_ia32_cmpordss:
+    case X86::BI__builtin_ia32_cmpordpd:
+    case X86::BI__builtin_ia32_cmpordsd:
+      Imm = 7;
+      break;
+    }
+
+    // Choose the intrinsic ID.
+    const char *name;
+    Intrinsic::ID ID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_cmpeqps:
+    case X86::BI__builtin_ia32_cmpltps:
+    case X86::BI__builtin_ia32_cmpleps:
+    case X86::BI__builtin_ia32_cmpunordps:
+    case X86::BI__builtin_ia32_cmpneqps:
+    case X86::BI__builtin_ia32_cmpnltps:
+    case X86::BI__builtin_ia32_cmpnleps:
+    case X86::BI__builtin_ia32_cmpordps:
+      name = "cmpps";
+      ID = Intrinsic::x86_sse_cmp_ps;
+      break;
+    case X86::BI__builtin_ia32_cmpeqss:
+    case X86::BI__builtin_ia32_cmpltss:
+    case X86::BI__builtin_ia32_cmpless:
+    case X86::BI__builtin_ia32_cmpunordss:
+    case X86::BI__builtin_ia32_cmpneqss:
+    case X86::BI__builtin_ia32_cmpnltss:
+    case X86::BI__builtin_ia32_cmpnless:
+    case X86::BI__builtin_ia32_cmpordss:
+      name = "cmpss";
+      ID = Intrinsic::x86_sse_cmp_ss;
+      break;
+    case X86::BI__builtin_ia32_cmpeqpd:
+    case X86::BI__builtin_ia32_cmpltpd:
+    case X86::BI__builtin_ia32_cmplepd:
+    case X86::BI__builtin_ia32_cmpunordpd:
+    case X86::BI__builtin_ia32_cmpneqpd:
+    case X86::BI__builtin_ia32_cmpnltpd:
+    case X86::BI__builtin_ia32_cmpnlepd:
+    case X86::BI__builtin_ia32_cmpordpd:
+      name = "cmppd";
+      ID = Intrinsic::x86_sse2_cmp_pd;
+      break;
+    case X86::BI__builtin_ia32_cmpeqsd:
+    case X86::BI__builtin_ia32_cmpltsd:
+    case X86::BI__builtin_ia32_cmplesd:
+    case X86::BI__builtin_ia32_cmpunordsd:
+    case X86::BI__builtin_ia32_cmpneqsd:
+    case X86::BI__builtin_ia32_cmpnltsd:
+    case X86::BI__builtin_ia32_cmpnlesd:
+    case X86::BI__builtin_ia32_cmpordsd:
+      name = "cmpsd";
+      ID = Intrinsic::x86_sse2_cmp_sd;
+      break;
+    }
+
+    Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
+    llvm::Function *F = CGM.getIntrinsic(ID);
+    return Builder.CreateCall(F, Ops, name);
   }
 }
 
index b3f8569524b9833bf02d3938e561d1fd71bfae0f..28d004309cf443d8ecad926eae4fda94dfb9f0c6 100644 (file)
@@ -155,148 +155,148 @@ _mm_xor_pd(__m128d __a, __m128d __b)
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 0);
+  return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmplt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 1);
+  return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmple_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 2);
+  return (__m128d)__builtin_ia32_cmplepd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__b, __a, 1);
+  return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpge_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__b, __a, 2);
+  return (__m128d)__builtin_ia32_cmplepd(__b, __a);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpord_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 7);
+  return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpunord_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 3);
+  return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpneq_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 4);
+  return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnlt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 5);
+  return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnle_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 6);
+  return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpngt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__b, __a, 5);
+  return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnge_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__b, __a, 6);
+  return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0);
+  return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmplt_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1);
+  return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmple_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2);
+  return (__m128d)__builtin_ia32_cmplesd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1);
+  __m128d __c = __builtin_ia32_cmpltsd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpge_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2);
+  __m128d __c = __builtin_ia32_cmplesd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpord_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7);
+  return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpunord_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3);
+  return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpneq_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4);
+  return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnlt_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5);
+  return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnle_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6);
+  return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpngt_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5);
+  __m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnge_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6);
+  __m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
index c9befcb456f68303a688b64a5c7a78e82fe0a7de..d1afe81601c326234979d8a27e021978367fea82 100644 (file)
@@ -182,153 +182,153 @@ _mm_xor_ps(__m128 __a, __m128 __b)
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 0);
+  return (__m128)__builtin_ia32_cmpeqss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 0);
+  return (__m128)__builtin_ia32_cmpeqps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmplt_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 1);
+  return (__m128)__builtin_ia32_cmpltss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmplt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 1);
+  return (__m128)__builtin_ia32_cmpltps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmple_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 2);
+  return (__m128)__builtin_ia32_cmpless(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmple_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 2);
+  return (__m128)__builtin_ia32_cmpleps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpss(__b, __a, 1),
+                                         __builtin_ia32_cmpltss(__b, __a),
                                          4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__b, __a, 1);
+  return (__m128)__builtin_ia32_cmpltps(__b, __a);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpge_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpss(__b, __a, 2),
+                                         __builtin_ia32_cmpless(__b, __a),
                                          4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpge_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__b, __a, 2);
+  return (__m128)__builtin_ia32_cmpleps(__b, __a);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpneq_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 4);
+  return (__m128)__builtin_ia32_cmpneqss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpneq_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 4);
+  return (__m128)__builtin_ia32_cmpneqps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnlt_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 5);
+  return (__m128)__builtin_ia32_cmpnltss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnlt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 5);
+  return (__m128)__builtin_ia32_cmpnltps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnle_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 6);
+  return (__m128)__builtin_ia32_cmpnless(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnle_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 6);
+  return (__m128)__builtin_ia32_cmpnleps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpngt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpss(__b, __a, 5),
+                                         __builtin_ia32_cmpnltss(__b, __a),
                                          4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpngt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__b, __a, 5);
+  return (__m128)__builtin_ia32_cmpnltps(__b, __a);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnge_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpss(__b, __a, 6),
+                                         __builtin_ia32_cmpnless(__b, __a),
                                          4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnge_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__b, __a, 6);
+  return (__m128)__builtin_ia32_cmpnleps(__b, __a);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpord_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 7);
+  return (__m128)__builtin_ia32_cmpordss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpord_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 7);
+  return (__m128)__builtin_ia32_cmpordps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpunord_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 3);
+  return (__m128)__builtin_ia32_cmpunordss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpunord_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 3);
+  return (__m128)__builtin_ia32_cmpunordps(__a, __b);
 }
 
 static __inline__ int __attribute__((__always_inline__, __nodebug__))
index 238454bb92e94a2cc074bd6ec59df805b596a965..34c2de7d331bcb40dbb145e2639d1763d9a4cab6 100644 (file)
@@ -255,3 +255,291 @@ __m128i test_blend_epi16(__m128i V1, __m128i V2) {
   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
   return _mm_blend_epi16(V1, V2, 42);
 }
+
+__m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpeq_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+  return _mm_cmpeq_ss(__a, __b);
+}
+
+__m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmplt_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+  return _mm_cmplt_ss(__a, __b);
+}
+
+__m128 test_mm_cmple_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmple_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+  return _mm_cmple_ss(__a, __b);
+}
+
+__m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpunord_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
+  return _mm_cmpunord_ss(__a, __b);
+}
+
+__m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpneq_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
+  return _mm_cmpneq_ss(__a, __b);
+}
+
+__m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpnlt_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+  return _mm_cmpnlt_ss(__a, __b);
+}
+
+__m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpnle_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+  return _mm_cmpnle_ss(__a, __b);
+}
+
+__m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpord_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
+  return _mm_cmpord_ss(__a, __b);
+}
+
+__m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpgt_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+  return _mm_cmpgt_ss(__a, __b);
+}
+
+__m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpge_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+  return _mm_cmpge_ss(__a, __b);
+}
+
+__m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpngt_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+  return _mm_cmpngt_ss(__a, __b);
+}
+
+__m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpnge_ss
+  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+  return _mm_cmpnge_ss(__a, __b);
+}
+
+__m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpeq_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+  return _mm_cmpeq_ps(__a, __b);
+}
+
+__m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmplt_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+  return _mm_cmplt_ps(__a, __b);
+}
+
+__m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmple_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+  return _mm_cmple_ps(__a, __b);
+}
+
+__m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpunord_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
+  return _mm_cmpunord_ps(__a, __b);
+}
+
+__m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpneq_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
+  return _mm_cmpneq_ps(__a, __b);
+}
+
+__m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpnlt_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+  return _mm_cmpnlt_ps(__a, __b);
+}
+
+__m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpnle_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+  return _mm_cmpnle_ps(__a, __b);
+}
+
+__m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpord_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
+  return _mm_cmpord_ps(__a, __b);
+}
+
+__m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpgt_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+  return _mm_cmpgt_ps(__a, __b);
+}
+
+__m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpge_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+  return _mm_cmpge_ps(__a, __b);
+}
+
+__m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpngt_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+  return _mm_cmpngt_ps(__a, __b);
+}
+
+__m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpnge_ps
+  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+  return _mm_cmpnge_ps(__a, __b);
+}
+
+__m128d test_mm_cmpeq_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpeq_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+  return _mm_cmpeq_sd(__a, __b);
+}
+
+__m128d test_mm_cmplt_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmplt_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmplt_sd(__a, __b);
+}
+
+__m128d test_mm_cmple_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmple_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmple_sd(__a, __b);
+}
+
+__m128d test_mm_cmpunord_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpunord_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
+  return _mm_cmpunord_sd(__a, __b);
+}
+
+__m128d test_mm_cmpneq_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpneq_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
+  return _mm_cmpneq_sd(__a, __b);
+}
+
+__m128d test_mm_cmpnlt_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpnlt_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpnlt_sd(__a, __b);
+}
+
+__m128d test_mm_cmpnle_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpnle_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnle_sd(__a, __b);
+}
+
+__m128d test_mm_cmpord_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpord_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
+  return _mm_cmpord_sd(__a, __b);
+}
+
+__m128d test_mm_cmpgt_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpgt_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmpgt_sd(__a, __b);
+}
+
+__m128d test_mm_cmpge_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpge_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmpge_sd(__a, __b);
+}
+
+__m128d test_mm_cmpngt_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpngt_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpngt_sd(__a, __b);
+}
+
+__m128d test_mm_cmpnge_sd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpnge_sd
+  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnge_sd(__a, __b);
+}
+
+__m128d test_mm_cmpeq_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpeq_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+  return _mm_cmpeq_pd(__a, __b);
+}
+
+__m128d test_mm_cmplt_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmplt_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmplt_pd(__a, __b);
+}
+
+__m128d test_mm_cmple_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmple_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmple_pd(__a, __b);
+}
+
+__m128d test_mm_cmpunord_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpunord_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
+  return _mm_cmpunord_pd(__a, __b);
+}
+
+__m128d test_mm_cmpneq_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpneq_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
+  return _mm_cmpneq_pd(__a, __b);
+}
+
+__m128d test_mm_cmpnlt_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpnlt_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpnlt_pd(__a, __b);
+}
+
+__m128d test_mm_cmpnle_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpnle_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnle_pd(__a, __b);
+}
+
+__m128d test_mm_cmpord_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpord_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
+  return _mm_cmpord_pd(__a, __b);
+}
+
+__m128d test_mm_cmpgt_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpgt_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmpgt_pd(__a, __b);
+}
+
+__m128d test_mm_cmpge_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpge_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmpge_pd(__a, __b);
+}
+
+__m128d test_mm_cmpngt_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpngt_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpngt_pd(__a, __b);
+}
+
+__m128d test_mm_cmpnge_pd(__m128d __a, __m128d __b) {
+  // CHECK-LABEL: @test_mm_cmpnge_pd
+  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnge_pd(__a, __b);
+}