From: Artem Belevich Date: Tue, 7 Mar 2017 20:33:38 +0000 (+0000) Subject: [NVPTX] Fixed lowering of unaligned loads/stores of f16 scalars and vectors. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=85cbde2b329b50c249bf2e9c905aaca469e39c14;p=llvm [NVPTX] Fixed lowering of unaligned loads/stores of f16 scalars and vectors. Differential Revision: https://reviews.llvm.org/D30672 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297198 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index c2877c34f63..8b3e49abc82 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2071,8 +2071,21 @@ SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType() == MVT::i1) return LowerLOADi1(Op, DAG); - else - return SDValue(); + + // v2f16 is legal, so we can't rely on legalizer to handle unaligned + // loads and have to handle it here. + if (Op.getValueType() == MVT::v2f16) { + LoadSDNode *Load = cast(Op); + EVT MemVT = Load->getMemoryVT(); + if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + Load->getAddressSpace(), Load->getAlignment())) { + SDValue Ops[2]; + std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); + return DAG.getMergeValues(Ops, SDLoc(Op)); + } + } + + return SDValue(); } // v = ld i1* addr @@ -2098,16 +2111,23 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { } SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { - EVT ValVT = Op.getOperand(1).getValueType(); - switch (ValVT.getSimpleVT().SimpleTy) { - case MVT::i1: + StoreSDNode *Store = cast(Op); + EVT VT = Store->getMemoryVT(); + + if (VT == MVT::i1) return LowerSTOREi1(Op, DAG); - default: - if (ValVT.isVector()) - return LowerSTOREVector(Op, DAG); - else - return SDValue(); - } + + // v2f16 is legal, so we can't rely on legalizer to handle unaligned + // stores and have to handle it here. + if (VT == MVT::v2f16 && + !allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + Store->getAddressSpace(), Store->getAlignment())) + return expandUnalignedStore(Store, DAG); + + if (VT.isVector()) + return LowerSTOREVector(Op, DAG); + + return SDValue(); } SDValue diff --git a/test/CodeGen/NVPTX/f16-instructions.ll b/test/CodeGen/NVPTX/f16-instructions.ll index 3d39104579d..403a67f02f8 100644 --- a/test/CodeGen/NVPTX/f16-instructions.ll +++ b/test/CodeGen/NVPTX/f16-instructions.ll @@ -161,6 +161,20 @@ define half @test_load(half* %a) #0 { ret half %r } +; CHECK-LABEL: .visible .func test_halfp0a1( +; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_halfp0a1_param_0]; +; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_halfp0a1_param_1]; +; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] +; CHECK-DAG: st.u8 [%[[TO]]], [[B0]] +; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] +; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]] +; CHECK: ret +define void @test_halfp0a1(half * noalias readonly %from, half * %to) { + %1 = load half, half * %from , align 1 + store half %1, half * %to , align 1 + ret void +} + declare half @test_callee(half %a, half %b) #0 ; CHECK-LABEL: test_call( diff --git a/test/CodeGen/NVPTX/misaligned-vector-ldst.ll b/test/CodeGen/NVPTX/misaligned-vector-ldst.ll index 2ad72b01885..036d9638cea 100644 --- a/test/CodeGen/NVPTX/misaligned-vector-ldst.ll +++ b/test/CodeGen/NVPTX/misaligned-vector-ldst.ll @@ -41,6 +41,64 @@ define <4 x float> @t4(i8* %p1) { ret <4 x float> %r } +; CHECK-LABEL: .visible .func test_v1halfp0a1( +; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v1halfp0a1_param_0]; +; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v1halfp0a1_param_1]; +; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] +; CHECK-DAG: st.u8 [%[[TO]]], [[B0]] +; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] +; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]] +; CHECK: ret +define void @test_v1halfp0a1(<1 x half> * noalias readonly %from, <1 x half> * %to) { + %1 = load <1 x half>, <1 x half> * %from , align 1 + store <1 x half> %1, <1 x half> * %to , align 1 + ret void +} + +; CHECK-LABEL: .visible .func test_v2halfp0a1( +; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v2halfp0a1_param_0]; +; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v2halfp0a1_param_1]; +; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] +; CHECK-DAG: st.u8 [%[[TO]]], +; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] +; CHECK-DAG: st.u8 [%[[TO]]+1], +; CHECK-DAG: ld.u8 [[B2:%r[sd]?[0-9]+]], [%[[FROM]]+2] +; CHECK-DAG: st.u8 [%[[TO]]+2], +; CHECK-DAG: ld.u8 [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3] +; CHECK-DAG: st.u8 [%[[TO]]+3], +; CHECK: ret +define void @test_v2halfp0a1(<2 x half> * noalias readonly %from, <2 x half> * %to) { + %1 = load <2 x half>, <2 x half> * %from , align 1 + store <2 x half> %1, <2 x half> * %to , align 1 + ret void +} + +; CHECK-LABEL: .visible .func test_v4halfp0a1( +; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v4halfp0a1_param_0]; +; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v4halfp0a1_param_1]; +; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] +; CHECK-DAG: st.u8 [%[[TO]]], [[B0]] +; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] +; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]] +; CHECK-DAG: ld.u8 [[B2:%r[sd]?[0-9]+]], [%[[FROM]]+2] +; CHECK-DAG: st.u8 [%[[TO]]+2], [[B2]] +; CHECK-DAG: ld.u8 [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3] +; CHECK-DAG: st.u8 [%[[TO]]+3], [[B3]] +; CHECK-DAG: ld.u8 [[B4:%r[sd]?[0-9]+]], [%[[FROM]]+4] +; CHECK-DAG: st.u8 [%[[TO]]+4], [[B4]] +; CHECK-DAG: ld.u8 [[B5:%r[sd]?[0-9]+]], [%[[FROM]]+5] +; CHECK-DAG: st.u8 [%[[TO]]+5], [[B5]] +; CHECK-DAG: ld.u8 [[B6:%r[sd]?[0-9]+]], [%[[FROM]]+6] +; CHECK-DAG: st.u8 [%[[TO]]+6], [[B6]] +; CHECK-DAG: ld.u8 [[B7:%r[sd]?[0-9]+]], [%[[FROM]]+7] +; CHECK-DAG: st.u8 [%[[TO]]+7], [[B7]] +; CHECK: ret +define void @test_v4halfp0a1(<4 x half> * noalias readonly %from, <4 x half> * %to) { + %1 = load <4 x half>, <4 x half> * %from , align 1 + store <4 x half> %1, <4 x half> * %to , align 1 + ret void +} + ; CHECK-LABEL: s1 define void @s1(<4 x float>* %p1, <4 x float> %v) {