goodness because it provides opportunites to cleanup things. For example,
uint64_t t1(__m128i vA)
{
uint64_t Alo;
_mm_storel_epi64((__m128i*)&Alo, vA);
return Alo;
}
was generating
movq %xmm0, -8(%rbp)
movq -8(%rbp), %rax
and now generates
movd %xmm0, %rax
rdar://
11282581
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@155924
91177308-0d34-0410-b5e6-
96231b3b80d8
BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIc", "")
BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fi", "")
-BUILTIN(__builtin_ia32_storelv4si, "vV2i*V2LLi", "")
-
BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "")
BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "")
BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "")
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_storel_epi64(__m128i *p, __m128i a)
{
- __builtin_ia32_storelv4si((__v2si *)p, a);
+ struct __mm_storel_epi64_struct {
+ long long u;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __mm_storel_epi64_struct*)p)->u = a[0];
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
tmp_V16c = __builtin_ia32_lddqu(tmp_cCp);
tmp_V2LLi = __builtin_ia32_palignr128(tmp_V2LLi, tmp_V2LLi, imm_i);
tmp_V1LLi = __builtin_ia32_palignr(tmp_V1LLi, tmp_V1LLi, imm_i);
- (void) __builtin_ia32_storelv4si(tmp_V2ip, tmp_V2LLi);
#ifdef USE_SSE4
tmp_V16c = __builtin_ia32_pblendvb128(tmp_V16c, tmp_V16c, tmp_V16c);
tmp_V8s = __builtin_ia32_pblendw128(tmp_V8s, tmp_V8s, imm_i_0_256);
// CHECK: @llvm.x86.sse41.round.sd
return _mm_round_sd(x, y, 2);
}
+
+void test_storel_epi64(__m128i x, void* y) {
+ // CHECK: define void @test_storel_epi64
+ // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
+ _mm_storel_epi64(y, x);
+}