From: Matt Arsenault Date: Thu, 31 Jan 2019 23:41:23 +0000 (+0000) Subject: GlobalISel: Fix MMO creation with non-power-of-2 mem size X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=dbe3ece2e560bc0e712201485ab246f3bd35a384;p=llvm GlobalISel: Fix MMO creation with non-power-of-2 mem size It should probably just be mandatory for getTgtMemIntrinsic to return the alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352817 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 49a2262ea09..09bbbe2d8ae 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1213,12 +1213,13 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { - uint64_t Size = Info.memVT.getStoreSize(); - if (Info.align == 0) - Info.align = Size; + unsigned Align = Info.align; + if (Align == 0) + Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext())); + uint64_t Size = Info.memVT.getStoreSize(); MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, Size, Info.align)); + Info.flags, Size, Align)); } return true; diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 4ee1fd33381..dd89f9a4c11 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -2323,3 +2323,12 @@ define float @test_sqrt_f32(float %x) { %y = call float @llvm.sqrt.f32(float %x) ret float %y } + +; CHECK-LABEL: name: test_llvm.aarch64.neon.ld3.v4i32.p0i32 +; CHECK: %1:_(s384) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld3), %0(p0) :: (load 48 from %ir.ptr, align 64) +define void @test_llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %ptr) { + %arst = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %ptr) + ret void +} + +declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) #3