From 783fea130264748edda5b25f17019543c458b20f Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Thu, 5 Apr 2018 20:52:58 +0000 Subject: [PATCH] PR36992: do not store beyond the dsize of a class object unless we know the tail padding is not reused. We track on the AggValueSlot (and through a couple of other initialization actions) whether we're dealing with an object that might share its tail padding with some other object, so that we can avoid emitting stores into the tail padding if that's the case. We still widen stores into tail padding when we can do so. Differential Revision: https://reviews.llvm.org/D45306 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@329342 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGAtomic.cpp | 4 +- lib/CodeGen/CGBlocks.cpp | 3 +- lib/CodeGen/CGCall.cpp | 9 ++-- lib/CodeGen/CGClass.cpp | 66 ++++++++++++++---------- lib/CodeGen/CGDecl.cpp | 20 +++++--- lib/CodeGen/CGDeclCXX.cpp | 3 +- lib/CodeGen/CGExpr.cpp | 6 ++- lib/CodeGen/CGExprAgg.cpp | 86 ++++++++++++++++++++------------ lib/CodeGen/CGExprCXX.cpp | 25 +++++++--- lib/CodeGen/CGObjC.cpp | 11 ++-- lib/CodeGen/CGOpenMPRuntime.cpp | 2 +- lib/CodeGen/CGStmt.cpp | 13 ++--- lib/CodeGen/CGValue.h | 29 +++++++++-- lib/CodeGen/CodeGenFunction.h | 59 ++++++++++++++++------ lib/CodeGen/ItaniumCXXABI.cpp | 5 +- test/CodeGenCXX/tail-padding.cpp | 34 +++++++++++++ 16 files changed, 260 insertions(+), 115 deletions(-) create mode 100644 test/CodeGenCXX/tail-padding.cpp diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index 7e685d2097..efc708988a 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -1513,7 +1513,8 @@ void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const { getAtomicType()); bool IsVolatile = rvalue.isVolatileQualified() || LVal.isVolatileQualified(); - CGF.EmitAggregateCopy(Dest, Src, getAtomicType(), IsVolatile); + CGF.EmitAggregateCopy(Dest, Src, getAtomicType(), + AggValueSlot::DoesNotOverlap, IsVolatile); return; } @@ -2008,6 +2009,7 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) { AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap, Zeroed ? AggValueSlot::IsZeroed : AggValueSlot::IsNotZeroed); diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index 1a6904976f..4e57e6cd95 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -929,7 +929,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { AggValueSlot::forAddr(blockField, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); EmitAggExpr(copyExpr, Slot); } else { EmitSynthesizedCXXCopyCtor(blockField, src, copyExpr); diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index b01926e1ca..6797a5453e 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -3018,7 +3018,8 @@ static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF, Ty.getQualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); } void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, @@ -3486,7 +3487,8 @@ RValue CallArg::getRValue(CodeGenFunction &CGF) const { if (!HasLV) return RV; LValue Copy = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty), Ty); - CGF.EmitAggregateCopy(Copy, LV, Ty, LV.isVolatile()); + CGF.EmitAggregateCopy(Copy, LV, Ty, AggValueSlot::DoesNotOverlap, + LV.isVolatile()); IsUsed = true; return RValue::getAggregate(Copy.getAddress()); } @@ -3500,7 +3502,8 @@ void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const { else { auto Addr = HasLV ? LV.getAddress() : RV.getAggregateAddress(); LValue SrcLV = CGF.MakeAddrLValue(Addr, Ty); - CGF.EmitAggregateCopy(Dst, SrcLV, Ty, + // We assume that call args are never copied into subobjects. + CGF.EmitAggregateCopy(Dst, SrcLV, Ty, AggValueSlot::DoesNotOverlap, HasLV ? LV.isVolatileQualified() : RV.isVolatileQualified()); } diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index 4ba5c06099..fb09b4d17a 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -555,10 +555,12 @@ static void EmitBaseInitializer(CodeGenFunction &CGF, BaseClassDecl, isBaseVirtual); AggValueSlot AggSlot = - AggValueSlot::forAddr(V, Qualifiers(), - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::forAddr( + V, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + CGF.overlapForBaseInit(ClassDecl, BaseClassDecl, isBaseVirtual)); CGF.EmitAggExpr(BaseInit->getInit(), AggSlot); @@ -647,7 +649,8 @@ static void EmitMemberInitializer(CodeGenFunction &CGF, LValue Src = CGF.EmitLValueForFieldInitialization(ThisRHSLV, Field); // Copy the aggregate. - CGF.EmitAggregateCopy(LHS, Src, FieldType, LHS.isVolatileQualified()); + CGF.EmitAggregateCopy(LHS, Src, FieldType, CGF.overlapForFieldInit(Field), + LHS.isVolatileQualified()); // Ensure that we destroy the objects if an exception is thrown later in // the constructor. QualType::DestructionKind dtorKind = FieldType.isDestructedType(); @@ -677,10 +680,12 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS, break; case TEK_Aggregate: { AggValueSlot Slot = - AggValueSlot::forLValue(LHS, - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::forLValue( + LHS, + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + overlapForFieldInit(Field)); EmitAggExpr(Init, Slot); break; } @@ -911,15 +916,15 @@ namespace { } CharUnits getMemcpySize(uint64_t FirstByteOffset) const { + ASTContext &Ctx = CGF.getContext(); unsigned LastFieldSize = - LastField->isBitField() ? - LastField->getBitWidthValue(CGF.getContext()) : - CGF.getContext().getTypeSize(LastField->getType()); - uint64_t MemcpySizeBits = - LastFieldOffset + LastFieldSize - FirstByteOffset + - CGF.getContext().getCharWidth() - 1; - CharUnits MemcpySize = - CGF.getContext().toCharUnitsFromBits(MemcpySizeBits); + LastField->isBitField() + ? LastField->getBitWidthValue(Ctx) + : Ctx.toBits( + Ctx.getTypeInfoDataSizeInChars(LastField->getType()).first); + uint64_t MemcpySizeBits = LastFieldOffset + LastFieldSize - + FirstByteOffset + Ctx.getCharWidth() - 1; + CharUnits MemcpySize = Ctx.toCharUnitsFromBits(MemcpySizeBits); return MemcpySize; } @@ -1960,7 +1965,8 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor, } EmitCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, curAddr, E); + /*Delegating=*/false, curAddr, E, + AggValueSlot::DoesNotOverlap); } // Go to the next element. @@ -1995,7 +2001,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, Address This, - const CXXConstructExpr *E) { + const CXXConstructExpr *E, + AggValueSlot::Overlap_t Overlap) { CallArgList Args; // Push the this ptr. @@ -2011,7 +2018,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, LValue Src = EmitLValue(Arg); QualType DestTy = getContext().getTypeDeclType(D->getParent()); LValue Dest = MakeAddrLValue(This, DestTy); - EmitAggregateCopyCtor(Dest, Src); + EmitAggregateCopyCtor(Dest, Src, Overlap); return; } @@ -2023,7 +2030,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor(), /*ParamsToSkip*/ 0, Order); - EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args); + EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args, + Overlap); } static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, @@ -2055,7 +2063,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, bool ForVirtualBase, bool Delegating, Address This, - CallArgList &Args) { + CallArgList &Args, + AggValueSlot::Overlap_t Overlap) { const CXXRecordDecl *ClassDecl = D->getParent(); // C++11 [class.mfct.non-static]p2: @@ -2082,7 +2091,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, LValue SrcLVal = MakeAddrLValue(Src, SrcTy); QualType DestTy = getContext().getTypeDeclType(ClassDecl); LValue DestLVal = MakeAddrLValue(This, DestTy); - EmitAggregateCopyCtor(DestLVal, SrcLVal); + EmitAggregateCopyCtor(DestLVal, SrcLVal, Overlap); return; } @@ -2171,7 +2180,7 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall( } EmitCXXConstructorCall(D, Ctor_Base, ForVirtualBase, /*Delegating*/false, - This, Args); + This, Args, AggValueSlot::MayOverlap); } void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall( @@ -2267,7 +2276,8 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D, EmitCallArgs(Args, FPT, drop_begin(E->arguments(), 1), E->getConstructor(), /*ParamsToSkip*/ 1); - EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args); + EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args, + AggValueSlot::MayOverlap); } void @@ -2302,7 +2312,8 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor, } EmitCXXConstructorCall(Ctor, CtorType, /*ForVirtualBase=*/false, - /*Delegating=*/true, This, DelegateArgs); + /*Delegating=*/true, This, DelegateArgs, + AggValueSlot::MayOverlap); } namespace { @@ -2333,7 +2344,8 @@ CodeGenFunction::EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor AggValueSlot::forAddr(ThisPtr, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::MayOverlap); EmitAggExpr(Ctor->init_begin()[0]->getInit(), AggSlot); diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 2a547b2766..c9b80e38d4 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -1416,17 +1416,17 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { } } -/// Emit an expression as an initializer for a variable at the given -/// location. The expression is not necessarily the normal -/// initializer for the variable, and the address is not necessarily +/// Emit an expression as an initializer for an object (variable, field, etc.) +/// at the given location. The expression is not necessarily the normal +/// initializer for the object, and the address is not necessarily /// its normal location. /// /// \param init the initializing expression -/// \param var the variable to act as if we're initializing +/// \param D the object to act as if we're initializing /// \param loc the address to initialize; its type is a pointer -/// to the LLVM mapping of the variable's type +/// to the LLVM mapping of the object's type /// \param alignment the alignment of the address -/// \param capturedByInit true if the variable is a __block variable +/// \param capturedByInit true if \p D is a __block variable /// whose address is potentially changed by the initializer void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit) { @@ -1454,11 +1454,17 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, if (type->isAtomicType()) { EmitAtomicInit(const_cast(init), lvalue); } else { + AggValueSlot::Overlap_t Overlap = AggValueSlot::MayOverlap; + if (isa(D)) + Overlap = AggValueSlot::DoesNotOverlap; + else if (auto *FD = dyn_cast(D)) + Overlap = overlapForFieldInit(FD); // TODO: how can we delay here if D is captured by its initializer? EmitAggExpr(init, AggValueSlot::forLValue(lvalue, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + Overlap)); } return; } diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index fc92425f7e..75b54b33bc 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -53,7 +53,8 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D, case TEK_Aggregate: CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv,AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); return; } llvm_unreachable("bad evaluation kind"); diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 4c6f4df7ed..ba34208230 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -214,7 +214,8 @@ void CodeGenFunction::EmitAnyExprToMem(const Expr *E, EmitAggExpr(E, AggValueSlot::forAddr(Location, Quals, AggValueSlot::IsDestructed_t(IsInit), AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsAliased_t(!IsInit))); + AggValueSlot::IsAliased_t(!IsInit), + AggValueSlot::MayOverlap)); return; } @@ -432,7 +433,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { E->getType().getQualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); break; } } diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index 959086891c..e2871e3d0e 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -337,7 +337,8 @@ void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src, AggValueSlot srcAgg = AggValueSlot::forLValue(src, AggValueSlot::IsDestructed, - needsGC(type), AggValueSlot::IsAliased); + needsGC(type), AggValueSlot::IsAliased, + AggValueSlot::MayOverlap); EmitCopy(type, Dest, srcAgg); } @@ -348,7 +349,7 @@ void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src, void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest, const AggValueSlot &src) { if (dest.requiresGCollection()) { - CharUnits sz = CGF.getContext().getTypeSizeInChars(type); + CharUnits sz = dest.getPreferredSize(CGF.getContext(), type); llvm::Value *size = llvm::ConstantInt::get(CGF.SizeTy, sz.getQuantity()); CGF.CGM.getObjCRuntime().EmitGCMemmoveCollectable(CGF, dest.getAddress(), @@ -362,7 +363,7 @@ void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest, // the two sides. LValue DestLV = CGF.MakeAddrLValue(dest.getAddress(), type); LValue SrcLV = CGF.MakeAddrLValue(src.getAddress(), type); - CGF.EmitAggregateCopy(DestLV, SrcLV, type, + CGF.EmitAggregateCopy(DestLV, SrcLV, type, dest.mayOverlap(), dest.isVolatile() || src.isVolatile()); } @@ -759,6 +760,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { valueDest.isExternallyDestructed(), valueDest.requiresGCollection(), valueDest.isPotentiallyAliased(), + AggValueSlot::DoesNotOverlap, AggValueSlot::IsZeroed); } @@ -986,7 +988,8 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { EmitCopy(E->getLHS()->getType(), AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), - AggValueSlot::IsAliased), + AggValueSlot::IsAliased, + AggValueSlot::MayOverlap), Dest); return; } @@ -1007,7 +1010,8 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { AggValueSlot LHSSlot = AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), - AggValueSlot::IsAliased); + AggValueSlot::IsAliased, + AggValueSlot::MayOverlap); // A non-volatile aggregate destination might have volatile member. if (!LHSSlot.isVolatile() && CGF.hasVolatileMember(E->getLHS()->getType())) @@ -1185,6 +1189,7 @@ AggExprEmitter::EmitInitializationToLValue(Expr *E, LValue LV) { AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + AggValueSlot::MayOverlap, Dest.isZeroed())); return; case TEK_Scalar: @@ -1283,11 +1288,12 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { Address V = CGF.GetAddressOfDirectBaseInCompleteClass( Dest.getAddress(), CXXRD, BaseRD, /*isBaseVirtual*/ false); - AggValueSlot AggSlot = - AggValueSlot::forAddr(V, Qualifiers(), - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot AggSlot = AggValueSlot::forAddr( + V, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + CGF.overlapForBaseInit(CXXRD, BaseRD, Base.isVirtual())); CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot); if (QualType::DestructionKind dtorKind = @@ -1468,7 +1474,9 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, // If the subexpression is an ArrayInitLoopExpr, share its cleanup. auto elementSlot = AggValueSlot::forLValue( elementLV, AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased); + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); AggExprEmitter(CGF, elementSlot, false) .VisitArrayInitLoopExpr(InnerLoop, outerBegin); } else @@ -1584,7 +1592,7 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E, } // If the type is 16-bytes or smaller, prefer individual stores over memset. - CharUnits Size = CGF.getContext().getTypeSizeInChars(E->getType()); + CharUnits Size = Slot.getPreferredSize(CGF.getContext(), E->getType()); if (Size <= CharUnits::fromQuantity(16)) return; @@ -1630,13 +1638,37 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) { LValue LV = MakeAddrLValue(Temp, E->getType()); EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); return LV; } -void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, - QualType Ty, bool isVolatile, - bool isAssignment) { +AggValueSlot::Overlap_t CodeGenFunction::overlapForBaseInit( + const CXXRecordDecl *RD, const CXXRecordDecl *BaseRD, bool IsVirtual) { + // Virtual bases are initialized first, in address order, so there's never + // any overlap during their initialization. + // + // FIXME: Under P0840, this is no longer true: the tail padding of a vbase + // of a field could be reused by a vbase of a containing class. + if (IsVirtual) + return AggValueSlot::DoesNotOverlap; + + // If the base class is laid out entirely within the nvsize of the derived + // class, its tail padding cannot yet be initialized, so we can issue + // stores at the full width of the base class. + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + if (Layout.getBaseClassOffset(BaseRD) + + getContext().getASTRecordLayout(BaseRD).getSize() <= + Layout.getNonVirtualSize()) + return AggValueSlot::DoesNotOverlap; + + // The tail padding may contain values we need to preserve. + return AggValueSlot::MayOverlap; +} + +void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty, + AggValueSlot::Overlap_t MayOverlap, + bool isVolatile) { assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex"); Address DestPtr = Dest.getAddress(); @@ -1669,12 +1701,11 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, // implementation handles this case safely. If there is a libc that does not // safely handle this, we can add a target hook. - // Get data size info for this aggregate. If this is an assignment, - // don't copy the tail padding, because we might be assigning into a - // base subobject where the tail padding is claimed. Otherwise, - // copying it is fine. + // Get data size info for this aggregate. Don't copy the tail padding if this + // might be a potentially-overlapping subobject, since the tail padding might + // be occupied by a different object. Otherwise, copying it is fine. std::pair TypeInfo; - if (isAssignment) + if (MayOverlap) TypeInfo = getContext().getTypeInfoDataSizeInChars(Ty); else TypeInfo = getContext().getTypeInfoInChars(Ty); @@ -1686,22 +1717,11 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, getContext().getAsArrayType(Ty))) { QualType BaseEltTy; SizeVal = emitArrayLength(VAT, BaseEltTy, DestPtr); - TypeInfo = getContext().getTypeInfoDataSizeInChars(BaseEltTy); - std::pair LastElementTypeInfo; - if (!isAssignment) - LastElementTypeInfo = getContext().getTypeInfoInChars(BaseEltTy); + TypeInfo = getContext().getTypeInfoInChars(BaseEltTy); assert(!TypeInfo.first.isZero()); SizeVal = Builder.CreateNUWMul( SizeVal, llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity())); - if (!isAssignment) { - SizeVal = Builder.CreateNUWSub( - SizeVal, - llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity())); - SizeVal = Builder.CreateNUWAdd( - SizeVal, llvm::ConstantInt::get( - SizeTy, LastElementTypeInfo.first.getQuantity())); - } } } if (!SizeVal) { diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 56c5e68117..914dd18f72 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -279,7 +279,10 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( const Expr *Arg = *CE->arg_begin(); LValue RHS = EmitLValue(Arg); LValue Dest = MakeAddrLValue(This.getAddress(), Arg->getType()); - EmitAggregateCopy(Dest, RHS, Arg->getType()); + // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's + // constructing a new complete object of type Ctor. + EmitAggregateCopy(Dest, RHS, Arg->getType(), + AggValueSlot::DoesNotOverlap); return RValue::get(This.getPointer()); } llvm_unreachable("unknown trivial member function"); @@ -631,7 +634,7 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E, // Call the constructor. EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, - Dest.getAddress(), E); + Dest.getAddress(), E, Dest.mayOverlap()); } } @@ -933,7 +936,8 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, } static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init, - QualType AllocType, Address NewPtr) { + QualType AllocType, Address NewPtr, + AggValueSlot::Overlap_t MayOverlap) { // FIXME: Refactor with EmitExprAsInit. switch (CGF.getEvaluationKind(AllocType)) { case TEK_Scalar: @@ -949,7 +953,8 @@ static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init, = AggValueSlot::forAddr(NewPtr, AllocType.getQualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + MayOverlap); CGF.EmitAggExpr(Init, Slot); return; } @@ -1018,7 +1023,8 @@ void CodeGenFunction::EmitNewArrayInitializer( AggValueSlot::forAddr(CurPtr, ElementType.getQualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); EmitAggExpr(ILE->getInit(0), Slot); // Move past these elements. @@ -1083,7 +1089,8 @@ void CodeGenFunction::EmitNewArrayInitializer( // an array, and we have an array filler, we can fold together the two // initialization loops. StoreAnyExprIntoOneUnit(*this, ILE->getInit(i), - ILE->getInit(i)->getType(), CurPtr); + ILE->getInit(i)->getType(), CurPtr, + AggValueSlot::DoesNotOverlap); CurPtr = Address(Builder.CreateInBoundsGEP(CurPtr.getPointer(), Builder.getSize(1), "array.exp.next"), @@ -1236,7 +1243,8 @@ void CodeGenFunction::EmitNewArrayInitializer( } // Emit the initializer into this element. - StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr); + StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr, + AggValueSlot::DoesNotOverlap); // Leave the Cleanup if we entered one. if (CleanupDominator) { @@ -1267,7 +1275,8 @@ static void EmitNewInitializer(CodeGenFunction &CGF, const CXXNewExpr *E, CGF.EmitNewArrayInitializer(E, ElementType, ElementTy, NewPtr, NumElements, AllocSizeWithoutCookie); else if (const Expr *Init = E->getInitializer()) - StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr); + StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr, + AggValueSlot::DoesNotOverlap); } /// Emit a call to an operator new or operator delete function, as implicitly diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index a8dd96ff1f..b156737c18 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -1013,8 +1013,9 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // that's not necessarily the same as "on the stack", so // we still potentially need objc_memmove_collectable. EmitAggregateCopy(/* Dest= */ MakeAddrLValue(ReturnValue, ivarType), - /* Src= */ LV, ivarType); - return; } + /* Src= */ LV, ivarType, overlapForReturnValue()); + return; + } case TEK_Scalar: { llvm::Value *value; if (propType->isReferenceType()) { @@ -1439,7 +1440,8 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP, EmitAggExpr(IvarInit->getInit(), AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); } // constructor returns 'self'. CodeGenTypes &Types = CGM.getTypes(); @@ -3381,7 +3383,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); FinishFunction(); HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index acb683e5b0..66149af43c 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4793,7 +4793,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, CGF.getNaturalTypeAlignment(SharedsTy)); LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); - CGF.EmitAggregateCopy(Dest, Src, SharedsTy); + CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); } // Emit initial values for private copies (if any). TaskResultTy Result; diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index b89d9f0929..ab1556e4f1 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -1007,7 +1007,7 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) { } else if (RV.isAggregate()) { LValue Dest = MakeAddrLValue(ReturnValue, Ty); LValue Src = MakeAddrLValue(RV.getAggregateAddress(), Ty); - EmitAggregateCopy(Dest, Src, Ty); + EmitAggregateCopy(Dest, Src, Ty, overlapForReturnValue()); } else { EmitStoreOfComplex(RV.getComplexVal(), MakeAddrLValue(ReturnValue, Ty), /*init*/ true); @@ -1085,11 +1085,12 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { /*isInit*/ true); break; case TEK_Aggregate: - EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue, - Qualifiers(), - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + EmitAggExpr(RV, AggValueSlot::forAddr( + ReturnValue, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + overlapForReturnValue())); break; } } diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index a32be19496..ea997c18d6 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -472,17 +472,25 @@ class AggValueSlot { /// evaluating an expression which constructs such an object. bool AliasedFlag : 1; + /// This is set to true if the tail padding of this slot might overlap + /// another object that may have already been initialized (and whose + /// value must be preserved by this initialization). If so, we may only + /// store up to the dsize of the type. Otherwise we can widen stores to + /// the size of the type. + bool OverlapFlag : 1; + public: enum IsAliased_t { IsNotAliased, IsAliased }; enum IsDestructed_t { IsNotDestructed, IsDestructed }; enum IsZeroed_t { IsNotZeroed, IsZeroed }; + enum Overlap_t { DoesNotOverlap, MayOverlap }; enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers }; /// ignored - Returns an aggregate value slot indicating that the /// aggregate value is being ignored. static AggValueSlot ignored() { return forAddr(Address::invalid(), Qualifiers(), IsNotDestructed, - DoesNotNeedGCBarriers, IsNotAliased); + DoesNotNeedGCBarriers, IsNotAliased, DoesNotOverlap); } /// forAddr - Make a slot for an aggregate value. @@ -500,6 +508,7 @@ public: IsDestructed_t isDestructed, NeedsGCBarriers_t needsGC, IsAliased_t isAliased, + Overlap_t mayOverlap, IsZeroed_t isZeroed = IsNotZeroed) { AggValueSlot AV; if (addr.isValid()) { @@ -514,6 +523,7 @@ public: AV.ObjCGCFlag = needsGC; AV.ZeroedFlag = isZeroed; AV.AliasedFlag = isAliased; + AV.OverlapFlag = mayOverlap; return AV; } @@ -521,9 +531,10 @@ public: IsDestructed_t isDestructed, NeedsGCBarriers_t needsGC, IsAliased_t isAliased, + Overlap_t mayOverlap, IsZeroed_t isZeroed = IsNotZeroed) { - return forAddr(LV.getAddress(), - LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed); + return forAddr(LV.getAddress(), LV.getQuals(), isDestructed, needsGC, + isAliased, mayOverlap, isZeroed); } IsDestructed_t isExternallyDestructed() const { @@ -571,6 +582,10 @@ public: return IsAliased_t(AliasedFlag); } + Overlap_t mayOverlap() const { + return Overlap_t(OverlapFlag); + } + RValue asRValue() const { if (isIgnored()) { return RValue::getIgnored(); @@ -583,6 +598,14 @@ public: IsZeroed_t isZeroed() const { return IsZeroed_t(ZeroedFlag); } + + /// Get the preferred size to use when storing a value to this slot. This + /// is the type size unless that might overlap another object, in which + /// case it's the dsize. + CharUnits getPreferredSize(ASTContext &Ctx, QualType Type) const { + return mayOverlap() ? Ctx.getTypeInfoDataSizeInChars(Type).first + : Ctx.getTypeSizeInChars(Type); + } }; } // end namespace CodeGen diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 4db5accc44..90f0220561 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2061,7 +2061,8 @@ public: T.getQualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); } /// Emit a cast to void* in the appropriate address space. @@ -2118,28 +2119,52 @@ public: } return false; } - /// EmitAggregateCopy - Emit an aggregate assignment. - /// - /// The difference to EmitAggregateCopy is that tail padding is not copied. - /// This is required for correctness when assigning non-POD structures in C++. + + /// Determine whether a return value slot may overlap some other object. + AggValueSlot::Overlap_t overlapForReturnValue() { + // FIXME: Assuming no overlap here breaks guaranteed copy elision for base + // class subobjects. These cases may need to be revisited depending on the + // resolution of the relevant core issue. + return AggValueSlot::DoesNotOverlap; + } + + /// Determine whether a field initialization may overlap some other object. + AggValueSlot::Overlap_t overlapForFieldInit(const FieldDecl *FD) { + // FIXME: These cases can result in overlap as a result of P0840R0's + // [[no_unique_address]] attribute. We can still infer NoOverlap in the + // presence of that attribute if the field is within the nvsize of its + // containing class, because non-virtual subobjects are initialized in + // address order. + return AggValueSlot::DoesNotOverlap; + } + + /// Determine whether a base class initialization may overlap some other + /// object. + AggValueSlot::Overlap_t overlapForBaseInit(const CXXRecordDecl *RD, + const CXXRecordDecl *BaseRD, + bool IsVirtual); + + /// Emit an aggregate assignment. void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy) { bool IsVolatile = hasVolatileMember(EltTy); - EmitAggregateCopy(Dest, Src, EltTy, IsVolatile, /* isAssignment= */ true); + EmitAggregateCopy(Dest, Src, EltTy, AggValueSlot::MayOverlap, IsVolatile); } - void EmitAggregateCopyCtor(LValue Dest, LValue Src) { - EmitAggregateCopy(Dest, Src, Src.getType(), - /* IsVolatile= */ false, /* IsAssignment= */ false); + void EmitAggregateCopyCtor(LValue Dest, LValue Src, + AggValueSlot::Overlap_t MayOverlap) { + EmitAggregateCopy(Dest, Src, Src.getType(), MayOverlap); } /// EmitAggregateCopy - Emit an aggregate copy. /// - /// \param isVolatile - True iff either the source or the destination is - /// volatile. - /// \param isAssignment - If false, allow padding to be copied. This often - /// yields more efficient. + /// \param isVolatile \c true iff either the source or the destination is + /// volatile. + /// \param MayOverlap Whether the tail padding of the destination might be + /// occupied by some other object. More efficient code can often be + /// generated if not. void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, - bool isVolatile = false, bool isAssignment = false); + AggValueSlot::Overlap_t MayOverlap, + bool isVolatile = false); /// GetAddrOfLocalVar - Return the address of a local variable. Address GetAddrOfLocalVar(const VarDecl *VD) { @@ -2303,11 +2328,13 @@ public: void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, - Address This, const CXXConstructExpr *E); + Address This, const CXXConstructExpr *E, + AggValueSlot::Overlap_t Overlap); void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, - Address This, CallArgList &Args); + Address This, CallArgList &Args, + AggValueSlot::Overlap_t Overlap); /// Emit assumption load for all bases. Requires to be be called only on /// most-derived class and not under construction of the object. diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index 55e74cd425..32027297aa 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -3896,7 +3896,7 @@ static void InitCatchParam(CodeGenFunction &CGF, caughtExnAlignment); LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType); LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType); - CGF.EmitAggregateCopy(Dest, Src, CatchType); + CGF.EmitAggregateCopy(Dest, Src, CatchType, AggValueSlot::DoesNotOverlap); return; } @@ -3923,7 +3923,8 @@ static void InitCatchParam(CodeGenFunction &CGF, AggValueSlot::forAddr(ParamAddr, Qualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); // Leave the terminate scope. CGF.EHStack.popTerminate(); diff --git a/test/CodeGenCXX/tail-padding.cpp b/test/CodeGenCXX/tail-padding.cpp new file mode 100644 index 0000000000..c3089bfe5e --- /dev/null +++ b/test/CodeGenCXX/tail-padding.cpp @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s + +// PR36992 +namespace Implicit { + struct A { char c; A(const A&); }; + struct B { int n; char c[3]; ~B(); }; + struct C : B, virtual A {}; + static_assert(sizeof(C) == sizeof(void*) + 8); + C f(C c) { return c; } + + // CHECK: define {{.*}} @_ZN8Implicit1CC1EOS0_ + // CHECK: call void @_ZN8Implicit1AC2ERKS0_( + // Note: this must memcpy 7 bytes, not 8, to avoid trampling over the virtual base class. + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 7, i1 false) + // CHECK: store i32 {{.*}} @_ZTVN8Implicit1CE +} + +namespace InitWithinNVSize { + // This is the same as the previous test, except that the A base lies + // entirely within the nvsize of C. This makes it valid to copy at the + // full width. + struct A { char c; A(const A&); }; + struct B { int n; char c[3]; ~B(); }; + struct C : B, virtual A { char x; }; + static_assert(sizeof(C) > sizeof(void*) + 8); + C f(C c) { return c; } + + // CHECK: define {{.*}} @_ZN16InitWithinNVSize1CC1EOS0_ + // CHECK: call void @_ZN16InitWithinNVSize1AC2ERKS0_( + // This copies over the 'C::x' member, but that's OK because we've not initialized it yet. + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false) + // CHECK: store i32 {{.*}} @_ZTVN16InitWithinNVSize1CE + // CHECK: store i8 +} -- 2.40.0