]> granicus.if.org Git - clang/commitdiff
Get rid of storelv4si builtin as it can be expressed directly. This is general
authorChad Rosier <mcrosier@apple.com>
Tue, 1 May 2012 18:11:51 +0000 (18:11 +0000)
committerChad Rosier <mcrosier@apple.com>
Tue, 1 May 2012 18:11:51 +0000 (18:11 +0000)
goodness because it provides opportunites to cleanup things.  For example,

uint64_t t1(__m128i vA)
{
  uint64_t Alo;
  _mm_storel_epi64((__m128i*)&Alo, vA);
  return Alo;
}

was generating

movq %xmm0, -8(%rbp)
movq -8(%rbp), %rax

and now generates

movd %xmm0, %rax

rdar://11282581

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@155924 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/BuiltinsX86.def
lib/Headers/emmintrin.h
test/CodeGen/builtins-x86.c
test/CodeGen/sse-builtins.c

index 4aea980a9d5cd49442c637c45a918698c8da5493..e487e487ca5dbf69c572de999f0ac0961abbb3b1 100644 (file)
@@ -303,8 +303,6 @@ BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "")
 BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIc", "")
 BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fi", "")
 
-BUILTIN(__builtin_ia32_storelv4si, "vV2i*V2LLi", "")
-
 BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "")
 BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "")
 BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "")
index e10b77d1b2349e3fccc019be6471f51aadcfb39f..91395ed16f7f60ac7c800c2ed10c8318e76c0093 100644 (file)
@@ -1186,7 +1186,10 @@ _mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
 static __inline__ void __attribute__((__always_inline__, __nodebug__))
 _mm_storel_epi64(__m128i *p, __m128i a)
 {
-  __builtin_ia32_storelv4si((__v2si *)p, a);
+  struct __mm_storel_epi64_struct {
+    long long u;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __mm_storel_epi64_struct*)p)->u = a[0];
 }
 
 static __inline__ void __attribute__((__always_inline__, __nodebug__))
index acb5554db4d39262e354090300475d1948480b4e..fcf1512ca16ec8adb1b245384f239107d72a6f54 100644 (file)
@@ -344,7 +344,6 @@ void f0() {
   tmp_V16c = __builtin_ia32_lddqu(tmp_cCp);
   tmp_V2LLi = __builtin_ia32_palignr128(tmp_V2LLi, tmp_V2LLi, imm_i);
   tmp_V1LLi = __builtin_ia32_palignr(tmp_V1LLi, tmp_V1LLi, imm_i);
-  (void) __builtin_ia32_storelv4si(tmp_V2ip, tmp_V2LLi);
 #ifdef USE_SSE4
   tmp_V16c = __builtin_ia32_pblendvb128(tmp_V16c, tmp_V16c, tmp_V16c);
   tmp_V8s = __builtin_ia32_pblendw128(tmp_V8s, tmp_V8s, imm_i_0_256);
index 2d5742515553a8d6d118fce36b8054c4f5a7a3f5..0e48560b0869c14c4e20fb7834f3ea19b80a778e 100644 (file)
@@ -151,3 +151,9 @@ __m128d test_mm_round_sd(__m128d x, __m128d y) {
   // CHECK: @llvm.x86.sse41.round.sd
   return _mm_round_sd(x, y, 2);
 }
+
+void test_storel_epi64(__m128i x, void* y) {
+  // CHECK: define void @test_storel_epi64
+  // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
+  _mm_storel_epi64(y, x);
+}