From 739f0de995be7840616933de76774ee156f704a0 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sat, 24 Jun 2017 01:35:19 +0000 Subject: [PATCH] [InstCombine] Don't replace allocas with smaller globals Summary: InstCombine replaces large allocas with small globals consts causing buffer overflows on valid code, see PR33372. This fix permits this optimization only if the global is dereference for alloca size. Fixes PR33372 Reviewers: eugenis, majnemer, chandlerc Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D34311 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306194 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombineLoadStoreAlloca.cpp | 15 ++++++++- .../InstCombine/memcpy-from-global.ll | 31 +++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index a4d84ae81aa..ee30aa44674 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -169,6 +169,18 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI, return nullptr; } +/// Returns true if V is dereferenceable for size of alloca. +static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, + const DataLayout &DL) { + if (AI->isArrayAllocation()) + return false; + uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType()); + if (!AllocaSize) + return false; + return isDereferenceableAndAlignedPointer(V, AI->getAlignment(), + APInt(64, AllocaSize), DL); +} + static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // Check for array size of 1 (scalar allocation). if (!AI.isArrayAllocation()) { @@ -390,7 +402,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment( Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT); - if (AI.getAlignment() <= SourceAlign) { + if (AI.getAlignment() <= SourceAlign && + isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) diff --git a/test/Transforms/InstCombine/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll index 7c9384d89ba..c8b329e94b0 100644 --- a/test/Transforms/InstCombine/memcpy-from-global.ll +++ b/test/Transforms/InstCombine/memcpy-from-global.ll @@ -204,3 +204,34 @@ define void @test9_addrspacecast() { ; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U], [2 x %U]* @H, i64 0, i64 1) to i8*)) ret void } + +@bbb = local_unnamed_addr global [1000000 x i8] zeroinitializer, align 16 +@_ZL3KKK = internal unnamed_addr constant [3 x i8] c"\01\01\02", align 1 + +; Should not replace alloca with global because of size mismatch. +define void @test9_small_global() { +; CHECK-LABEL: @test9_small_global( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}@bbb,{{.*}}@_ZL3KKK, +; CHECK: alloca [1000000 x i8] +entry: + %cc = alloca [1000000 x i8], align 16 + %cc.0..sroa_idx = getelementptr inbounds [1000000 x i8], [1000000 x i8]* %cc, i64 0, i64 0 + %arraydecay = getelementptr inbounds [1000000 x i8], [1000000 x i8]* %cc, i32 0, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZL3KKK, i32 0, i32 0), i64 3, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([1000000 x i8], [1000000 x i8]* @bbb, i32 0, i32 0), i8* %arraydecay, i64 1000000, i32 16, i1 false) + ret void +} + +; Should replace alloca with global as they have exactly the same size. +define void @test10_same_global() { +; CHECK-LABEL: @test10_same_global( +; CHECK-NOT: alloca +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}@bbb,{{.*}}@_ZL3KKK,{{.*}}, i64 3, +entry: + %cc = alloca [3 x i8], align 1 + %cc.0..sroa_idx = getelementptr inbounds [3 x i8], [3 x i8]* %cc, i64 0, i64 0 + %arraydecay = getelementptr inbounds [3 x i8], [3 x i8]* %cc, i32 0, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZL3KKK, i32 0, i32 0), i64 3, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([1000000 x i8], [1000000 x i8]* @bbb, i32 0, i32 0), i8* %arraydecay, i64 3, i32 1, i1 false) + ret void +} -- 2.50.1