From: JF Bastien Date: Fri, 12 Apr 2019 00:11:27 +0000 (+0000) Subject: Variable auto-init: also auto-init alloca X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=eae000b4253d99ee3ce97cdfa6aa31947994661f;p=clang Variable auto-init: also auto-init alloca Summary: alloca isn’t auto-init’d right now because it’s a different path in clang that all the other stuff we support (it’s a builtin, not an expression). Interestingly, alloca doesn’t have a type (as opposed to even VLA) so we can really only initialize it with memset. Subscribers: jkorous, dexonsmith, cfe-commits, rjmccall, glider, kees, kcc, pcc Tags: #clang Differential Revision: https://reviews.llvm.org/D60548 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@358243 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index d6ae818d6a..6b7ed4e8bf 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -17,6 +17,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -44,6 +45,25 @@ int64_t clamp(int64_t Value, int64_t Low, int64_t High) { return std::min(High, std::max(Low, Value)); } +static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes) { + ConstantInt *Byte; + switch (CGF.getLangOpts().getTrivialAutoVarInit()) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + // Nothing to initialize. + return; + case LangOptions::TrivialAutoVarInitKind::Zero: + Byte = CGF.Builder.getInt8(0x00); + break; + case LangOptions::TrivialAutoVarInitKind::Pattern: { + llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext()); + Byte = llvm::dyn_cast( + initializationPatternFor(CGF.CGM, Int8)); + break; + } + } + CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -2259,6 +2279,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, .getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(SuitableAlignmentInBytes); + initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); return RValue::get(AI); } @@ -2271,6 +2292,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(AlignmentInBytes); + initializeAlloca(*this, AI, Size, AlignmentInBytes); return RValue::get(AI); } diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 6f27d879a9..aea99dbbaf 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -19,6 +19,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" @@ -984,92 +985,13 @@ static bool shouldSplitConstantStore(CodeGenModule &CGM, return false; } -static llvm::Constant *patternFor(CodeGenModule &CGM, llvm::Type *Ty) { - // The following value is a guaranteed unmappable pointer value and has a - // repeated byte-pattern which makes it easier to synthesize. We use it for - // pointers as well as integers so that aggregates are likely to be - // initialized with this repeated value. - constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull; - // For 32-bit platforms it's a bit trickier because, across systems, only the - // zero page can reasonably be expected to be unmapped, and even then we need - // a very low address. We use a smaller value, and that value sadly doesn't - // have a repeated byte-pattern. We don't use it for integers. - constexpr uint32_t SmallValue = 0x000000AA; - // Floating-point values are initialized as NaNs because they propagate. Using - // a repeated byte pattern means that it will be easier to initialize - // all-floating-point aggregates and arrays with memset. Further, aggregates - // which mix integral and a few floats might also initialize with memset - // followed by a handful of stores for the floats. Using fairly unique NaNs - // also means they'll be easier to distinguish in a crash. - constexpr bool NegativeNaN = true; - constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull; - if (Ty->isIntOrIntVectorTy()) { - unsigned BitWidth = cast( - Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) - ->getBitWidth(); - if (BitWidth <= 64) - return llvm::ConstantInt::get(Ty, LargeValue); - return llvm::ConstantInt::get( - Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue))); - } - if (Ty->isPtrOrPtrVectorTy()) { - auto *PtrTy = cast( - Ty->isVectorTy() ? Ty->getVectorElementType() : Ty); - unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth( - PtrTy->getAddressSpace()); - llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth); - uint64_t IntValue; - switch (PtrWidth) { - default: - llvm_unreachable("pattern initialization of unsupported pointer width"); - case 64: - IntValue = LargeValue; - break; - case 32: - IntValue = SmallValue; - break; - } - auto *Int = llvm::ConstantInt::get(IntTy, IntValue); - return llvm::ConstantExpr::getIntToPtr(Int, PtrTy); - } - if (Ty->isFPOrFPVectorTy()) { - unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( - (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) - ->getFltSemantics()); - llvm::APInt Payload(64, NaNPayload); - if (BitWidth >= 64) - Payload = llvm::APInt::getSplat(BitWidth, Payload); - return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload); - } - if (Ty->isArrayTy()) { - // Note: this doesn't touch tail padding (at the end of an object, before - // the next array object). It is instead handled by replaceUndef. - auto *ArrTy = cast(Ty); - llvm::SmallVector Element( - ArrTy->getNumElements(), patternFor(CGM, ArrTy->getElementType())); - return llvm::ConstantArray::get(ArrTy, Element); - } - - // Note: this doesn't touch struct padding. It will initialize as much union - // padding as is required for the largest type in the union. Padding is - // instead handled by replaceUndef. Stores to structs with volatile members - // don't have a volatile qualifier when initialized according to C++. This is - // fine because stack-based volatiles don't really have volatile semantics - // anyways, and the initialization shouldn't be observable. - auto *StructTy = cast(Ty); - llvm::SmallVector Struct(StructTy->getNumElements()); - for (unsigned El = 0; El != Struct.size(); ++El) - Struct[El] = patternFor(CGM, StructTy->getElementType(El)); - return llvm::ConstantStruct::get(StructTy, Struct); -} - enum class IsPattern { No, Yes }; /// Generate a constant filled with either a pattern or zeroes. static llvm::Constant *patternOrZeroFor(CodeGenModule &CGM, IsPattern isPattern, llvm::Type *Ty) { if (isPattern == IsPattern::Yes) - return patternFor(CGM, Ty); + return initializationPatternFor(CGM, Ty); else return llvm::Constant::getNullValue(Ty); } @@ -1294,8 +1216,8 @@ static void emitStoresForPatternInit(CodeGenModule &CGM, const VarDecl &D, Address Loc, bool isVolatile, CGBuilderTy &Builder) { llvm::Type *ElTy = Loc.getElementType(); - llvm::Constant *constant = - constWithPadding(CGM, IsPattern::Yes, patternFor(CGM, ElTy)); + llvm::Constant *constant = constWithPadding( + CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy)); assert(!isa(constant)); emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); } @@ -1818,8 +1740,8 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { case LangOptions::TrivialAutoVarInitKind::Pattern: { llvm::Type *ElTy = Loc.getElementType(); - llvm::Constant *Constant = - constWithPadding(CGM, IsPattern::Yes, patternFor(CGM, ElTy)); + llvm::Constant *Constant = constWithPadding( + CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy)); CharUnits ConstantAlign = getContext().getTypeAlignInChars(VlaSize.Type); llvm::BasicBlock *SetupBB = createBasicBlock("vla-setup.loop"); llvm::BasicBlock *LoopBB = createBasicBlock("vla-init.loop"); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 29c6793c60..416bc4dc31 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -88,6 +88,7 @@ add_clang_library(clangCodeGen MicrosoftCXXABI.cpp ModuleBuilder.cpp ObjectFilePCHContainerOperations.cpp + PatternInit.cpp SanitizerMetadata.cpp SwiftCallingConv.cpp TargetInfo.cpp diff --git a/lib/CodeGen/PatternInit.cpp b/lib/CodeGen/PatternInit.cpp new file mode 100644 index 0000000000..7a1baf96cf --- /dev/null +++ b/lib/CodeGen/PatternInit.cpp @@ -0,0 +1,93 @@ +//===--- PatternInit.cpp - Pattern Initialization -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PatternInit.h" +#include "CodeGenModule.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Type.h" + +llvm::Constant *clang::CodeGen::initializationPatternFor(CodeGenModule &CGM, + llvm::Type *Ty) { + // The following value is a guaranteed unmappable pointer value and has a + // repeated byte-pattern which makes it easier to synthesize. We use it for + // pointers as well as integers so that aggregates are likely to be + // initialized with this repeated value. + constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull; + // For 32-bit platforms it's a bit trickier because, across systems, only the + // zero page can reasonably be expected to be unmapped, and even then we need + // a very low address. We use a smaller value, and that value sadly doesn't + // have a repeated byte-pattern. We don't use it for integers. + constexpr uint32_t SmallValue = 0x000000AA; + // Floating-point values are initialized as NaNs because they propagate. Using + // a repeated byte pattern means that it will be easier to initialize + // all-floating-point aggregates and arrays with memset. Further, aggregates + // which mix integral and a few floats might also initialize with memset + // followed by a handful of stores for the floats. Using fairly unique NaNs + // also means they'll be easier to distinguish in a crash. + constexpr bool NegativeNaN = true; + constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull; + if (Ty->isIntOrIntVectorTy()) { + unsigned BitWidth = cast( + Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) + ->getBitWidth(); + if (BitWidth <= 64) + return llvm::ConstantInt::get(Ty, LargeValue); + return llvm::ConstantInt::get( + Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue))); + } + if (Ty->isPtrOrPtrVectorTy()) { + auto *PtrTy = cast( + Ty->isVectorTy() ? Ty->getVectorElementType() : Ty); + unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth( + PtrTy->getAddressSpace()); + llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth); + uint64_t IntValue; + switch (PtrWidth) { + default: + llvm_unreachable("pattern initialization of unsupported pointer width"); + case 64: + IntValue = LargeValue; + break; + case 32: + IntValue = SmallValue; + break; + } + auto *Int = llvm::ConstantInt::get(IntTy, IntValue); + return llvm::ConstantExpr::getIntToPtr(Int, PtrTy); + } + if (Ty->isFPOrFPVectorTy()) { + unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( + (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) + ->getFltSemantics()); + llvm::APInt Payload(64, NaNPayload); + if (BitWidth >= 64) + Payload = llvm::APInt::getSplat(BitWidth, Payload); + return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload); + } + if (Ty->isArrayTy()) { + // Note: this doesn't touch tail padding (at the end of an object, before + // the next array object). It is instead handled by replaceUndef. + auto *ArrTy = cast(Ty); + llvm::SmallVector Element( + ArrTy->getNumElements(), + initializationPatternFor(CGM, ArrTy->getElementType())); + return llvm::ConstantArray::get(ArrTy, Element); + } + + // Note: this doesn't touch struct padding. It will initialize as much union + // padding as is required for the largest type in the union. Padding is + // instead handled by replaceUndef. Stores to structs with volatile members + // don't have a volatile qualifier when initialized according to C++. This is + // fine because stack-based volatiles don't really have volatile semantics + // anyways, and the initialization shouldn't be observable. + auto *StructTy = cast(Ty); + llvm::SmallVector Struct(StructTy->getNumElements()); + for (unsigned El = 0; El != Struct.size(); ++El) + Struct[El] = initializationPatternFor(CGM, StructTy->getElementType(El)); + return llvm::ConstantStruct::get(StructTy, Struct); +} diff --git a/lib/CodeGen/PatternInit.h b/lib/CodeGen/PatternInit.h new file mode 100644 index 0000000000..f117dde9ac --- /dev/null +++ b/lib/CodeGen/PatternInit.h @@ -0,0 +1,27 @@ +//===- PatternInit - Pattern initialization ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_PATTERNINIT_H +#define LLVM_CLANG_LIB_CODEGEN_PATTERNINIT_H + +namespace llvm { +class Constant; +class Type; +} // namespace llvm + +namespace clang { +namespace CodeGen { + +class CodeGenModule; + +llvm::Constant *initializationPatternFor(CodeGenModule &, llvm::Type *); + +} // end namespace CodeGen +} // end namespace clang + +#endif diff --git a/test/CodeGenCXX/trivial-auto-var-init.cpp b/test/CodeGenCXX/trivial-auto-var-init.cpp index 0a9ad86c7e..2cc63a47dd 100644 --- a/test/CodeGenCXX/trivial-auto-var-init.cpp +++ b/test/CodeGenCXX/trivial-auto-var-init.cpp @@ -173,6 +173,34 @@ void test_vla(int size) { used(ptr); } +// UNINIT-LABEL: test_alloca( +// ZERO-LABEL: test_alloca( +// ZERO: %[[SIZE:[a-z0-9]+]] = sext i32 %{{.*}} to i64 +// ZERO-NEXT: %[[ALLOCA:[a-z0-9]+]] = alloca i8, i64 %[[SIZE]], align [[ALIGN:[0-9]+]] +// ZERO-NEXT: call void @llvm.memset{{.*}}(i8* align [[ALIGN]] %[[ALLOCA]], i8 0, i64 %[[SIZE]], i1 false) +// PATTERN-LABEL: test_alloca( +// PATTERN: %[[SIZE:[a-z0-9]+]] = sext i32 %{{.*}} to i64 +// PATTERN-NEXT: %[[ALLOCA:[a-z0-9]+]] = alloca i8, i64 %[[SIZE]], align [[ALIGN:[0-9]+]] +// PATTERN-NEXT: call void @llvm.memset{{.*}}(i8* align [[ALIGN]] %[[ALLOCA]], i8 -86, i64 %[[SIZE]], i1 false) +void test_alloca(int size) { + void *ptr = __builtin_alloca(size); + used(ptr); +} + +// UNINIT-LABEL: test_alloca_with_align( +// ZERO-LABEL: test_alloca_with_align( +// ZERO: %[[SIZE:[a-z0-9]+]] = sext i32 %{{.*}} to i64 +// ZERO-NEXT: %[[ALLOCA:[a-z0-9]+]] = alloca i8, i64 %[[SIZE]], align 128 +// ZERO-NEXT: call void @llvm.memset{{.*}}(i8* align 128 %[[ALLOCA]], i8 0, i64 %[[SIZE]], i1 false) +// PATTERN-LABEL: test_alloca_with_align( +// PATTERN: %[[SIZE:[a-z0-9]+]] = sext i32 %{{.*}} to i64 +// PATTERN-NEXT: %[[ALLOCA:[a-z0-9]+]] = alloca i8, i64 %[[SIZE]], align 128 +// PATTERN-NEXT: call void @llvm.memset{{.*}}(i8* align 128 %[[ALLOCA]], i8 -86, i64 %[[SIZE]], i1 false) +void test_alloca_with_align(int size) { + void *ptr = __builtin_alloca_with_align(size, 1024); + used(ptr); +} + // UNINIT-LABEL: test_struct_vla( // ZERO-LABEL: test_struct_vla( // ZERO: %[[SIZE:[0-9]+]] = mul nuw i64 %{{.*}}, 16