]> granicus.if.org Git - clang/commitdiff
LLVM doesn't always optimize away the four loads from this:
authorBill Wendling <isanbard@gmail.com>
Thu, 12 May 2011 19:02:15 +0000 (19:02 +0000)
committerBill Wendling <isanbard@gmail.com>
Thu, 12 May 2011 19:02:15 +0000 (19:02 +0000)
     (__m128){ p[0], p[1], p[2], p[3] }

which produces really bad code. This could be done in instcombine, but it's
probably better to do it in the front-end instead.
<rdar://problem/9424836>

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@131237 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/BuiltinsX86.def
lib/CodeGen/CGBuiltin.cpp
lib/Headers/emmintrin.h
lib/Headers/xmmintrin.h
test/CodeGen/builtins-x86.c

index 2c2a84ab30ae12ec30f769ff5b9b65c585fd0a92..ce376dd3d71939cdd5a00b4139966025c2717824 100644 (file)
@@ -240,6 +240,7 @@ BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "")
 BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "")
 BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "")
 BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "")
+BUILTIN(__builtin_ia32_loadups, "V4ffC*", "")
 BUILTIN(__builtin_ia32_storeups, "vf*V4f", "")
 BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "")
 BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "")
@@ -253,6 +254,7 @@ BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "")
 BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "")
 BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "")
 BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "")
+BUILTIN(__builtin_ia32_loadupd, "V2ddC*", "")
 BUILTIN(__builtin_ia32_storeupd, "vd*V2d", "")
 BUILTIN(__builtin_ia32_movmskpd, "iV2d", "")
 BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "")
index 46546177ae5c2ce57d8f8558d484130a104f222a..494dfaeff7759b5697bd98615381f766036a5514 100644 (file)
@@ -2143,6 +2143,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
     return llvm::Constant::getNullValue(ConvertType(E->getType()));
   }
+  case X86::BI__builtin_ia32_loadups:
+  case X86::BI__builtin_ia32_loadupd:
   case X86::BI__builtin_ia32_loaddqu: {
     const llvm::Type *VecTy = ConvertType(E->getType());
     const llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), 128);
index 62c10b5134e26af25eff52cd93f2157b18fe8ebe..746e717a3098e714893c65c5b79bf04b72ead51f 100644 (file)
@@ -466,7 +466,7 @@ _mm_loadr_pd(double const *dp)
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_loadu_pd(double const *dp)
 {
-  return (__m128d){ dp[0], dp[1] };
+  return __builtin_ia32_loadupd(dp);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
index 00760ed6d1ef4cbb3556ac904bdb1e5d826317e9..42dd3e8d3b8787aabf9ad222d04564bf2f628702 100644 (file)
@@ -539,7 +539,7 @@ _mm_load_ps(const float *p)
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_loadu_ps(const float *p)
 {
-  return (__m128){ p[0], p[1], p[2], p[3] };
+  return __builtin_ia32_loadups(p);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
index bb63048b61666c465e35fb04da8b82ea9df991c2..190fa55c5756e448a814c30a3d9cfe84a3ab29cd 100644 (file)
@@ -273,6 +273,7 @@ void f0() {
 #endif
   tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f);
   (void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp);
+  tmp_V4f = __builtin_ia32_loadups(tmp_fCp);
   (void) __builtin_ia32_storeups(tmp_fp, tmp_V4f);
   (void) __builtin_ia32_storehps(tmp_V2ip, tmp_V4f);
   (void) __builtin_ia32_storelps(tmp_V2ip, tmp_V4f);
@@ -290,6 +291,7 @@ void f0() {
   tmp_V4f = __builtin_ia32_sqrtps(tmp_V4f);
   tmp_V4f = __builtin_ia32_sqrtss(tmp_V4f);
   (void) __builtin_ia32_maskmovdqu(tmp_V16c, tmp_V16c, tmp_cp);
+  tmp_V2d = __builtin_ia32_loadupd(tmp_dCp);
   (void) __builtin_ia32_storeupd(tmp_dp, tmp_V2d);
   tmp_i = __builtin_ia32_movmskpd(tmp_V2d);
   tmp_i = __builtin_ia32_pmovmskb128(tmp_V16c);