From 649d92ad2ffe6e6638047cc1a0cfbd5983de50f2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 17 Jun 2016 14:27:38 +0000 Subject: [PATCH] [X86][SSE4A] Remove the GCCBuiltins from the movntsd/movntss intrinsic defs so we can emit native IR from clang. Clang-side sibling commit to follow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273002 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 4 ++-- test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 3f53159f821..fc65efd4f07 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -942,9 +942,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse4a_insertq : GCCBuiltin<"__builtin_ia32_insertq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_sse4a_movnt_ss : GCCBuiltin<"__builtin_ia32_movntss">, + def int_x86_sse4a_movnt_ss : Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty], []>; - def int_x86_sse4a_movnt_sd : GCCBuiltin<"__builtin_ia32_movntsd">, + def int_x86_sse4a_movnt_sd : Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty], []>; } diff --git a/test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll index efe133dd335..f45abf1d85d 100644 --- a/test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll @@ -78,11 +78,10 @@ define void @test_stream_sd(double* %p, <2 x double> %a) { ; X64: # BB#0: ; X64-NEXT: movntsd %xmm0, (%rdi) ; X64-NEXT: retq - %bc = bitcast double* %p to i8* - call void @llvm.x86.sse4a.movnt.sd(i8* %bc, <2 x double> %a) + %1 = extractelement <2 x double> %a, i64 0 + store double %1, double* %p, align 1, !nontemporal !1 ret void } -declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>) nounwind readnone define void @test_mm_stream_ss(float* %p, <4 x float> %a) { ; X32-LABEL: test_mm_stream_ss: @@ -95,8 +94,9 @@ define void @test_mm_stream_ss(float* %p, <4 x float> %a) { ; X64: # BB#0: ; X64-NEXT: movntss %xmm0, (%rdi) ; X64-NEXT: retq - %bc = bitcast float* %p to i8* - call void @llvm.x86.sse4a.movnt.ss(i8* %bc, <4 x float> %a) + %1 = extractelement <4 x float> %a, i64 0 + store float %1, float* %p, align 1, !nontemporal !1 ret void } -declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>) nounwind readnone + +!1 = !{i32 1} -- 2.50.1