From e8796a2846d76d0ab767238fbc4980af3c844ffd Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 25 Jul 2019 12:14:27 +0000 Subject: [PATCH] [InstCombine] try to narrow a truncated load trunc (load X) --> load (bitcast X to narrow type) We have this transform in DAGCombiner::ReduceLoadWidth(), but the truncated load pattern can interfere with other instcombine transforms, so I'd like to allow the fold sooner. Example: https://bugs.llvm.org/show_bug.cgi?id=16739 ...in that report, we have bitcasts bracketing these ops, so those could get eliminated too. We've generally ruled out widening of loads early in IR ( LoadCombine - http://lists.llvm.org/pipermail/llvm-dev/2016-September/105291.html ), but that reasoning may not apply to narrowing if we can preserve information such as the dereferenceable range. Differential Revision: https://reviews.llvm.org/D64432 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@367011 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineCasts.cpp | 39 ++++++++++++ test/Transforms/InstCombine/trunc-load.ll | 62 +++++++++++++++---- 2 files changed, 89 insertions(+), 12 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 2c9ba203fbf..08a7a068af2 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -681,6 +681,42 @@ static Instruction *shrinkInsertElt(CastInst &Trunc, return nullptr; } +static Instruction *narrowLoad(TruncInst &Trunc, + InstCombiner::BuilderTy &Builder, + const DataLayout &DL) { + // Check the layout to ensure we are not creating an unsupported operation. + // TODO: Create a GEP to offset the load? + if (!DL.isLittleEndian()) + return nullptr; + unsigned NarrowBitWidth = Trunc.getDestTy()->getPrimitiveSizeInBits(); + if (!DL.isLegalInteger(NarrowBitWidth)) + return nullptr; + + // Match a truncated load with no other uses. + Value *X; + if (!match(Trunc.getOperand(0), m_OneUse(m_Load(m_Value(X))))) + return nullptr; + LoadInst *WideLoad = cast(Trunc.getOperand(0)); + if (!WideLoad->isSimple()) + return nullptr; + + // Don't narrow this load if we would lose information about the + // dereferenceable range. + bool CanBeNull; + uint64_t DerefBits = X->getPointerDereferenceableBytes(DL, CanBeNull) * 8; + if (DerefBits < WideLoad->getType()->getPrimitiveSizeInBits()) + return nullptr; + + // trunc (load X) --> load (bitcast X) + PointerType *PtrTy = PointerType::get(Trunc.getDestTy(), + WideLoad->getPointerAddressSpace()); + Value *Bitcast = Builder.CreatePointerCast(X, PtrTy); + LoadInst *NarrowLoad = new LoadInst(Trunc.getDestTy(), Bitcast); + NarrowLoad->setAlignment(WideLoad->getAlignment()); + copyMetadataForLoad(*NarrowLoad, *WideLoad); + return NarrowLoad; +} + Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; @@ -840,6 +876,9 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *I = foldVecTruncToExtElt(CI, *this)) return I; + if (Instruction *NewLoad = narrowLoad(CI, Builder, DL)) + return NewLoad; + return nullptr; } diff --git a/test/Transforms/InstCombine/trunc-load.ll b/test/Transforms/InstCombine/trunc-load.ll index e17e99b1338..c85c035da86 100644 --- a/test/Transforms/InstCombine/trunc-load.ll +++ b/test/Transforms/InstCombine/trunc-load.ll @@ -29,10 +29,15 @@ define i32 @truncload_small_deref(i64* dereferenceable(7) %ptr) { ; On little-endian, we can narrow the load without an offset. define i32 @truncload_deref(i64* dereferenceable(8) %ptr) { -; CHECK-LABEL: @truncload_deref( -; CHECK-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: ret i32 [[R]] +; LE-LABEL: @truncload_deref( +; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[PTR:%.*]] to i32* +; LE-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4 +; LE-NEXT: ret i32 [[R]] +; +; BE-LABEL: @truncload_deref( +; BE-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4 +; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 +; BE-NEXT: ret i32 [[R]] ; %x = load i64, i64* %ptr %r = trunc i64 %x to i32 @@ -42,10 +47,15 @@ define i32 @truncload_deref(i64* dereferenceable(8) %ptr) { ; Preserve alignment. define i16 @truncload_align(i32* dereferenceable(14) %ptr) { -; CHECK-LABEL: @truncload_align( -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[PTR:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[X]] to i16 -; CHECK-NEXT: ret i16 [[R]] +; LE-LABEL: @truncload_align( +; LE-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to i16* +; LE-NEXT: [[R:%.*]] = load i16, i16* [[TMP1]], align 16 +; LE-NEXT: ret i16 [[R]] +; +; BE-LABEL: @truncload_align( +; BE-NEXT: [[X:%.*]] = load i32, i32* [[PTR:%.*]], align 16 +; BE-NEXT: [[R:%.*]] = trunc i32 [[X]] to i16 +; BE-NEXT: ret i16 [[R]] ; %x = load i32, i32* %ptr, align 16 %r = trunc i32 %x to i16 @@ -98,12 +108,40 @@ define i32 @truncload_volatile(i64* dereferenceable(8) %ptr) { ; Preserve address space. define i32 @truncload_address_space(i64 addrspace(1)* dereferenceable(8) %ptr) { -; CHECK-LABEL: @truncload_address_space( -; CHECK-NEXT: [[X:%.*]] = load i64, i64 addrspace(1)* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: ret i32 [[R]] +; LE-LABEL: @truncload_address_space( +; LE-NEXT: [[TMP1:%.*]] = bitcast i64 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)* +; LE-NEXT: [[R:%.*]] = load i32, i32 addrspace(1)* [[TMP1]], align 4 +; LE-NEXT: ret i32 [[R]] +; +; BE-LABEL: @truncload_address_space( +; BE-NEXT: [[X:%.*]] = load i64, i64 addrspace(1)* [[PTR:%.*]], align 4 +; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 +; BE-NEXT: ret i32 [[R]] ; %x = load i64, i64 addrspace(1)* %ptr, align 4 %r = trunc i64 %x to i32 ret i32 %r } + +; Most metadata should be transferred to the narrow load. +; TODO: We lost the range. + +define i32 @truncload_metadata(i64* dereferenceable(8) %ptr) { +; LE-LABEL: @truncload_metadata( +; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[PTR:%.*]] to i32* +; LE-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4, !invariant.load !0, !nontemporal !1 +; LE-NEXT: ret i32 [[R]] +; +; BE-LABEL: @truncload_metadata( +; BE-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4, !range !0, !invariant.load !1, !nontemporal !2 +; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 +; BE-NEXT: ret i32 [[R]] +; + %x = load i64, i64* %ptr, align 4, !invariant.load !0, !nontemporal !1, !range !2 + %r = trunc i64 %x to i32 + ret i32 %r +} + +!0 = !{} +!1 = !{i32 1} +!2 = !{i64 0, i64 2} -- 2.40.0