BUILTIN(__builtin_ia32_movmskpd, "iV2d", "")
BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "")
BUILTIN(__builtin_ia32_movnti, "vi*i", "")
+BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "")
BUILTIN(__builtin_ia32_movntpd, "vd*V2d", "")
BUILTIN(__builtin_ia32_movntdq, "vV2LLi*V2LLi", "")
BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "")
case X86::BI__builtin_ia32_movntpd256:
case X86::BI__builtin_ia32_movntdq:
case X86::BI__builtin_ia32_movntdq256:
- case X86::BI__builtin_ia32_movnti: {
+ case X86::BI__builtin_ia32_movnti:
+ case X86::BI__builtin_ia32_movnti64: {
llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
Builder.getInt32(1));
"cast");
StoreInst *SI = Builder.CreateStore(Ops[1], BC);
SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
- SI->setAlignment(16);
+
+ // If the operand is an integer, we can't assume alignment. Otherwise,
+ // assume natural alignment.
+ QualType ArgTy = E->getArg(1)->getType();
+ unsigned Align;
+ if (ArgTy->isIntegerType())
+ Align = 1;
+ else
+ Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
+ SI->setAlignment(Align);
return SI;
}
// 3DNow!
__builtin_ia32_movnti(__p, __a);
}
+#ifdef __x86_64__
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_stream_si64(long long *__p, long long __a)
+{
+ __builtin_ia32_movnti64(__p, __a);
+}
+#endif
+
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_clflush(void const *__p)
{
const float* tmp_fCp;
double* tmp_dp;
const double* tmp_dCp;
+ long long* tmp_LLip;
#define imm_i 32
#define imm_i_0_2 0
tmp_i = __builtin_ia32_movmskpd(tmp_V2d);
tmp_i = __builtin_ia32_pmovmskb128(tmp_V16c);
(void) __builtin_ia32_movnti(tmp_ip, tmp_i);
+#ifdef USE_64
+ (void) __builtin_ia32_movnti64(tmp_LLip, tmp_LLi);
+#endif
(void) __builtin_ia32_movntpd(tmp_dp, tmp_V2d);
(void) __builtin_ia32_movntdq(tmp_V2LLip, tmp_V2LLi);
tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c);
-// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
#include <xmmintrin.h>
#include <emmintrin.h>
// CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
_mm_storel_epi64(y, x);
}
+
+void test_stream_si32(int x, void *y) {
+ // CHECK-LABEL: define void @test_stream_si32
+ // CHECK: store {{.*}} i32* {{.*}}, align 1, !nontemporal
+ _mm_stream_si32(y, x);
+}
+
+void test_stream_si64(long long x, void *y) {
+ // CHECK-LABEL: define void @test_stream_si64
+ // CHECK: store {{.*}} i64* {{.*}}, align 1, !nontemporal
+ _mm_stream_si64(y, x);
+}
+
+void test_stream_si128(__m128i x, void *y) {
+ // CHECK-LABEL: define void @test_stream_si128
+ // CHECK: store {{.*}} <2 x i64>* {{.*}}, align 16, !nontemporal
+ _mm_stream_si128(y, x);
+}