From: Eli Friedman Date: Mon, 23 Sep 2013 23:38:39 +0000 (+0000) Subject: Add _mm_stream_si64 intrinsic. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=440a5f49133307745de7cc92a44d53088cf47c26;p=clang Add _mm_stream_si64 intrinsic. While I'm here, also fix the alignment computation for the whole family of intrinsics. PR17298. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@191243 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 3e39e83673..947d15609a 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -258,6 +258,7 @@ BUILTIN(__builtin_ia32_storeupd, "vd*V2d", "") BUILTIN(__builtin_ia32_movmskpd, "iV2d", "") BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "") BUILTIN(__builtin_ia32_movnti, "vi*i", "") +BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "") BUILTIN(__builtin_ia32_movntpd, "vd*V2d", "") BUILTIN(__builtin_ia32_movntdq, "vV2LLi*V2LLi", "") BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "") diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 4b4c8f481c..d1a41cfd01 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -3249,7 +3249,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_movntpd256: case X86::BI__builtin_ia32_movntdq: case X86::BI__builtin_ia32_movntdq256: - case X86::BI__builtin_ia32_movnti: { + case X86::BI__builtin_ia32_movnti: + case X86::BI__builtin_ia32_movnti64: { llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(), Builder.getInt32(1)); @@ -3259,7 +3260,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, "cast"); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); - SI->setAlignment(16); + + // If the operand is an integer, we can't assume alignment. Otherwise, + // assume natural alignment. + QualType ArgTy = E->getArg(1)->getType(); + unsigned Align; + if (ArgTy->isIntegerType()) + Align = 1; + else + Align = getContext().getTypeSizeInChars(ArgTy).getQuantity(); + SI->setAlignment(Align); return SI; } // 3DNow! diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index f965dce2d5..505773e388 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -1214,6 +1214,14 @@ _mm_stream_si32(int *__p, int __a) __builtin_ia32_movnti(__p, __a); } +#ifdef __x86_64__ +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_si64(long long *__p, long long __a) +{ + __builtin_ia32_movnti64(__p, __a); +} +#endif + static __inline__ void __attribute__((__always_inline__, __nodebug__)) _mm_clflush(void const *__p) { diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c index 9e78235d37..261bf2f5d8 100644 --- a/test/CodeGen/builtins-x86.c +++ b/test/CodeGen/builtins-x86.c @@ -55,6 +55,7 @@ void f0() { const float* tmp_fCp; double* tmp_dp; const double* tmp_dCp; + long long* tmp_LLip; #define imm_i 32 #define imm_i_0_2 0 @@ -288,6 +289,9 @@ void f0() { tmp_i = __builtin_ia32_movmskpd(tmp_V2d); tmp_i = __builtin_ia32_pmovmskb128(tmp_V16c); (void) __builtin_ia32_movnti(tmp_ip, tmp_i); +#ifdef USE_64 + (void) __builtin_ia32_movnti64(tmp_LLip, tmp_LLi); +#endif (void) __builtin_ia32_movntpd(tmp_dp, tmp_V2d); (void) __builtin_ia32_movntdq(tmp_V2LLip, tmp_V2LLi); tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c); diff --git a/test/CodeGen/sse-builtins.c b/test/CodeGen/sse-builtins.c index 2a4ca17967..559d855547 100644 --- a/test/CodeGen/sse-builtins.c +++ b/test/CodeGen/sse-builtins.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s #include #include @@ -188,3 +188,21 @@ void test_storel_epi64(__m128i x, void* y) { // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}} _mm_storel_epi64(y, x); } + +void test_stream_si32(int x, void *y) { + // CHECK-LABEL: define void @test_stream_si32 + // CHECK: store {{.*}} i32* {{.*}}, align 1, !nontemporal + _mm_stream_si32(y, x); +} + +void test_stream_si64(long long x, void *y) { + // CHECK-LABEL: define void @test_stream_si64 + // CHECK: store {{.*}} i64* {{.*}}, align 1, !nontemporal + _mm_stream_si64(y, x); +} + +void test_stream_si128(__m128i x, void *y) { + // CHECK-LABEL: define void @test_stream_si128 + // CHECK: store {{.*}} <2 x i64>* {{.*}}, align 16, !nontemporal + _mm_stream_si128(y, x); +}