From: Artem Belevich Date: Thu, 2 Mar 2017 19:14:10 +0000 (+0000) Subject: [NVPTX] Added missing LDU/LDG intrinsics for f16. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=78a75787a3bcdfce051294c62ebdfdbb20c485a1;p=llvm [NVPTX] Added missing LDU/LDG intrinsics for f16. Differential Revision: https://reviews.llvm.org/D30512 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296784 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 13d86d31c04..4f2bd61a279 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -2450,6 +2450,7 @@ let mayLoad=1, hasSideEffects=0 in { defm LDV_i32 : LD_VEC; defm LDV_i64 : LD_VEC; defm LDV_f16 : LD_VEC; + defm LDV_f16x2 : LD_VEC; defm LDV_f32 : LD_VEC; defm LDV_f64 : LD_VEC; } diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 39779e8dfa9..8d228a9eeb7 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1503,6 +1503,8 @@ defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; +defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; +defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; @@ -1553,6 +1555,10 @@ defm INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; defm INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; +defm INT_PTX_LDU_G_v2f16_ELE + : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; +defm INT_PTX_LDU_G_v2f16x2_ELE + : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; defm INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; defm INT_PTX_LDU_G_v2i64_ELE @@ -1567,6 +1573,12 @@ defm INT_PTX_LDU_G_v4i16_ELE defm INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; +defm INT_PTX_LDU_G_v4f16_ELE + : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", + Float16Regs>; +defm INT_PTX_LDU_G_v4f16x2_ELE + : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", + Float16x2Regs>; defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; @@ -1665,7 +1677,9 @@ defm INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; defm INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; -defm INT_PTX_LDG_G_v4f16_ELE +defm INT_PTX_LDG_G_v2f16_ELE + : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; +defm INT_PTX_LDG_G_v2f16x2_ELE : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; defm INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; @@ -1679,7 +1693,9 @@ defm INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; defm INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; -defm INT_PTX_LDG_G_v8f16_ELE +defm INT_PTX_LDG_G_v4f16_ELE + : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; +defm INT_PTX_LDG_G_v4f16x2_ELE : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;