From 57cd1b89cd91433ce1991a5bff36fe776a263796 Mon Sep 17 00:00:00 2001 From: John McCall Date: Wed, 28 Mar 2012 23:30:44 +0000 Subject: [PATCH] When we can't prove that the target of an aggregate copy is a complete object, the memcpy needs to use the data size of the structure instead of its sizeof() value. Fixes PR12204. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@153613 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGBlocks.cpp | 3 +- lib/CodeGen/CGCall.cpp | 3 +- lib/CodeGen/CGClass.cpp | 14 +- lib/CodeGen/CGDecl.cpp | 5 +- lib/CodeGen/CGDeclCXX.cpp | 8 +- lib/CodeGen/CGException.cpp | 9 +- lib/CodeGen/CGExpr.cpp | 19 ++- lib/CodeGen/CGExprAgg.cpp | 203 ++++++++++++++++------------ lib/CodeGen/CGExprCXX.cpp | 3 +- lib/CodeGen/CGObjC.cpp | 9 +- lib/CodeGen/CGStmt.cpp | 6 +- lib/CodeGen/CGValue.h | 35 ++++- lib/CodeGen/CodeGenFunction.h | 14 +- test/CodeGenCXX/assign-operator.cpp | 21 +++ 14 files changed, 226 insertions(+), 126 deletions(-) diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index 9fe31cb958..5e36d9e8fb 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -734,7 +734,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { AggValueSlot::forAddr(blockField, Align, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject); EmitAggExpr(copyExpr, Slot); } else { EmitSynthesizedCXXCopyCtor(blockField, src, copyExpr); diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 4455f1a086..f5d7944b37 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -1875,7 +1875,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (Align > AI->getAlignment()) AI->setAlignment(Align); Args.push_back(AI); - EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified()); + EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified(), + /*destIsCompleteObject*/ true); // Validate argument match. checkArgMatches(AI, IRArgNo, IRFuncTy); diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index b452c1b7ab..254ef8001d 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -401,7 +401,8 @@ static void EmitBaseInitializer(CodeGenFunction &CGF, AggValueSlot::forAddr(V, Alignment, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::IsNotCompleteObject); CGF.EmitAggExpr(BaseInit->getInit(), AggSlot); @@ -449,7 +450,8 @@ static void EmitAggMemberInitializer(CodeGenFunction &CGF, AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject); CGF.EmitAggExpr(Init, Slot); } @@ -589,7 +591,8 @@ static void EmitMemberInitializer(CodeGenFunction &CGF, // Copy the aggregate. CGF.EmitAggregateCopy(LHS.getAddress(), Src.getAddress(), FieldType, - LHS.isVolatileQualified()); + LHS.isVolatileQualified(), + /*destIsCompleteObject*/ true); return; } } @@ -1371,7 +1374,10 @@ CodeGenFunction::EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor AggValueSlot::forAddr(ThisPtr, Alignment, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + CurGD.getCtorType() == Ctor_Complete + ? AggValueSlot::IsCompleteObject + : AggValueSlot::IsNotCompleteObject); EmitAggExpr(Ctor->init_begin()[0]->getInit(), AggSlot); diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 970f0b2389..fa95377ff6 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -1092,9 +1092,10 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, } else { // TODO: how can we delay here if D is captured by its initializer? EmitAggExpr(init, AggValueSlot::forLValue(lvalue, - AggValueSlot::IsDestructed, + AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject)); MaybeEmitStdInitializerListCleanup(lvalue.getAddress(), init); } } diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index dc52b11279..dfbd411e88 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -46,9 +46,11 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D, } else if (type->isAnyComplexType()) { CGF.EmitComplexExprIntoAddr(Init, DeclPtr, lv.isVolatile()); } else { - CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv,AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv, + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject)); } } diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index 95e0030866..dcb921261a 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -373,8 +373,7 @@ static void EmitAnyExprToExn(CodeGenFunction &CGF, const Expr *e, // evaluated but before the exception is caught. But the best way // to handle that is to teach EmitAggExpr to do the final copy // differently if it can't be elided. - CGF.EmitAnyExprToMem(e, typedAddr, e->getType().getQualifiers(), - /*IsInit*/ true); + CGF.EmitAnyExprToMem(e, typedAddr, e->getType().getQualifiers()); // Deactivate the cleanup block. CGF.DeactivateCleanupBlock(cleanup, cast(typedAddr)); @@ -1048,7 +1047,8 @@ static void InitCatchParam(CodeGenFunction &CGF, if (!copyExpr) { llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true); llvm::Value *adjustedExn = CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy); - CGF.EmitAggregateCopy(ParamAddr, adjustedExn, CatchType); + CGF.EmitAggregateCopy(ParamAddr, adjustedExn, CatchType, + /*volatile*/ false, 0, /*destIsCompleteObject*/ true); return; } @@ -1076,7 +1076,8 @@ static void InitCatchParam(CodeGenFunction &CGF, AggValueSlot::forAddr(ParamAddr, Alignment, Qualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject)); // Leave the terminate scope. CGF.EHStack.popTerminate(); diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 511914884f..05348d1a41 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -133,17 +133,17 @@ RValue CodeGenFunction::EmitAnyExprToTemp(const Expr *E) { /// location. void CodeGenFunction::EmitAnyExprToMem(const Expr *E, llvm::Value *Location, - Qualifiers Quals, - bool IsInit) { + Qualifiers Quals) { // FIXME: This function should take an LValue as an argument. if (E->getType()->isAnyComplexType()) { EmitComplexExprIntoAddr(E, Location, Quals.hasVolatile()); } else if (hasAggregateLLVMType(E->getType())) { CharUnits Alignment = getContext().getTypeAlignInChars(E->getType()); EmitAggExpr(E, AggValueSlot::forAddr(Location, Alignment, Quals, - AggValueSlot::IsDestructed_t(IsInit), + AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsAliased_t(!IsInit))); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject)); } else { RValue RV = RValue::get(EmitScalarExpr(E, /*Ignore*/ false)); LValue LV = MakeAddrLValue(Location, E->getType()); @@ -366,7 +366,8 @@ EmitExprForReferenceBinding(CodeGenFunction &CGF, const Expr *E, AggSlot = AggValueSlot::forAddr(ReferenceTemporary, Alignment, Qualifiers(), isDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject); } if (InitializedDecl) { @@ -2151,8 +2152,7 @@ LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){ const Expr *InitExpr = E->getInitializer(); LValue Result = MakeAddrLValue(DeclPtr, E->getType()); - EmitAnyExprToMem(InitExpr, DeclPtr, E->getType().getQualifiers(), - /*Init*/ true); + EmitAnyExprToMem(InitExpr, DeclPtr, E->getType().getQualifiers()); return Result; } @@ -2283,7 +2283,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { // as a value, copy it into a temporary, and return an lvalue referring to // that temporary. llvm::Value *V = CreateMemTemp(E->getType(), "ref.temp"); - EmitAnyExprToMem(E, V, E->getType().getQualifiers(), false); + EmitAnyExprToMem(E, V, E->getType().getQualifiers()); return MakeAddrLValue(V, E->getType()); } @@ -2754,8 +2754,7 @@ EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest, static llvm::Value * EmitValToTemp(CodeGenFunction &CGF, Expr *E) { llvm::Value *DeclPtr = CGF.CreateMemTemp(E->getType(), ".atomictmp"); - CGF.EmitAnyExprToMem(E, DeclPtr, E->getType().getQualifiers(), - /*Init*/ true); + CGF.EmitAnyExprToMem(E, DeclPtr, E->getType().getQualifiers()); return DeclPtr; } diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index b6efc1cafa..8f0d1db353 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -179,7 +179,8 @@ public: void VisitVAArgExpr(VAArgExpr *E); - void EmitInitializationToLValue(Expr *E, LValue Address); + void EmitInitializationToLValue(Expr *E, LValue Address, + AggValueSlot::IsCompleteObject_t isCompleteObject); void EmitNullInitializationToLValue(LValue Address); // case Expr::ChooseExprClass: void VisitCXXThrowExpr(const CXXThrowExpr *E) { CGF.EmitCXXThrowExpr(E); } @@ -279,7 +280,7 @@ void AggExprEmitter::EmitFinalDestCopy(const Expr *E, RValue Src, bool Ignore, // is volatile, unless copy has volatile for both source and destination.. CGF.EmitAggregateCopy(Dest.getAddr(), Src.getAggregateAddr(), E->getType(), Dest.isVolatile()|Src.isVolatileQualified(), - Alignment); + Alignment, Dest.isCompleteObject()); } /// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired. @@ -441,7 +442,8 @@ void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType, EmitStdInitializerList(element, initList); } else { LValue elementLV = CGF.MakeAddrLValue(element, elementType); - EmitInitializationToLValue(E->getInit(i), elementLV); + EmitInitializationToLValue(E->getInit(i), elementLV, + AggValueSlot::IsCompleteObject); } } @@ -488,7 +490,8 @@ void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType, // Emit the actual filler expression. LValue elementLV = CGF.MakeAddrLValue(currentElement, elementType); if (filler) - EmitInitializationToLValue(filler, elementLV); + EmitInitializationToLValue(filler, elementLV, + AggValueSlot::IsCompleteObject); else EmitNullInitializationToLValue(elementLV); @@ -567,7 +570,8 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { llvm::Value *CastPtr = Builder.CreateBitCast(Dest.getAddr(), CGF.ConvertType(PtrTy)); EmitInitializationToLValue(E->getSubExpr(), - CGF.MakeAddrLValue(CastPtr, Ty)); + CGF.MakeAddrLValue(CastPtr, Ty), + Dest.isCompleteObject()); break; } @@ -675,6 +679,29 @@ void AggExprEmitter::VisitPointerToDataMemberBinaryOperator( EmitFinalDestCopy(E, LV); } +/// Quickly check whether the object looks like it might be a complete +/// object. +static AggValueSlot::IsCompleteObject_t isCompleteObject(const Expr *E) { + E = E->IgnoreParens(); + + QualType objectType; + if (const DeclRefExpr *DRE = dyn_cast(E)) { + objectType = DRE->getDecl()->getType(); + } else if (const MemberExpr *ME = dyn_cast(E)) { + objectType = ME->getMemberDecl()->getType(); + } else { + // Be conservative. + return AggValueSlot::MayNotBeCompleteObject; + } + + // The expression refers directly to some sort of object. + // If that object has reference type, be conservative. + if (objectType->isReferenceType()) + return AggValueSlot::MayNotBeCompleteObject; + + return AggValueSlot::IsCompleteObject; +} + void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { // For an assignment to work, the value on the right has // to be compatible with the value on the left. @@ -682,7 +709,8 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { E->getRHS()->getType()) && "Invalid assignment"); - if (const DeclRefExpr *DRE = dyn_cast(E->getLHS())) + if (const DeclRefExpr *DRE + = dyn_cast(E->getLHS()->IgnoreParens())) if (const VarDecl *VD = dyn_cast(DRE->getDecl())) if (VD->hasAttr() && E->getRHS()->HasSideEffects(CGF.getContext())) { @@ -692,18 +720,20 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { LValue LHS = CGF.EmitLValue(E->getLHS()); Dest = AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), - AggValueSlot::IsAliased); + AggValueSlot::IsAliased, + AggValueSlot::IsCompleteObject); EmitFinalDestCopy(E, RHS, true); return; } - + LValue LHS = CGF.EmitLValue(E->getLHS()); // Codegen the RHS so that it stores directly into the LHS. AggValueSlot LHSSlot = AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), - AggValueSlot::IsAliased); + AggValueSlot::IsAliased, + isCompleteObject(E->getLHS())); CGF.EmitAggExpr(E->getRHS(), LHSSlot, false); EmitFinalDestCopy(E, LHS, true); } @@ -836,7 +866,8 @@ static bool isSimpleZero(const Expr *E, CodeGenFunction &CGF) { void -AggExprEmitter::EmitInitializationToLValue(Expr* E, LValue LV) { +AggExprEmitter::EmitInitializationToLValue(Expr* E, LValue LV, + AggValueSlot::IsCompleteObject_t isCompleteObject) { QualType type = LV.getType(); // FIXME: Ignore result? // FIXME: Are initializers affected by volatile? @@ -854,6 +885,7 @@ AggExprEmitter::EmitInitializationToLValue(Expr* E, LValue LV) { AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + isCompleteObject, Dest.isZeroed())); } else if (LV.isSimple()) { CGF.EmitScalarInit(E, /*D=*/0, LV, /*Captured=*/false); @@ -969,7 +1001,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, Field, 0); if (NumInitElements) { // Store the initializer into the field - EmitInitializationToLValue(E->getInit(0), FieldLoc); + EmitInitializationToLValue(E->getInit(0), FieldLoc, + AggValueSlot::IsCompleteObject); } else { // Default-initialize to null. EmitNullInitializationToLValue(FieldLoc); @@ -1011,7 +1044,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { if (curInitIndex < NumInitElements) { // Store the initializer into the field. - EmitInitializationToLValue(E->getInit(curInitIndex++), LV); + EmitInitializationToLValue(E->getInit(curInitIndex++), LV, + AggValueSlot::IsCompleteObject); } else { // We're out of initalizers; default-initialize to null EmitNullInitializationToLValue(LV); @@ -1186,105 +1220,106 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) { LValue LV = MakeAddrLValue(Temp, E->getType()); EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject)); return LV; } -void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr, - llvm::Value *SrcPtr, QualType Ty, - bool isVolatile, unsigned Alignment) { - assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex"); +void CodeGenFunction::EmitAggregateCopy(llvm::Value *dest, llvm::Value *src, + QualType type, + bool isVolatile, unsigned alignment, + bool destIsCompleteObject) { + assert(!type->isAnyComplexType() && "Shouldn't happen for complex"); + + // Get size and alignment info for this type. Note that the type + // might include an alignment attribute, so we can't just rely on + // the layout. + // FIXME: Do we need to handle VLAs here? + std::pair typeInfo = + getContext().getTypeInfoInChars(type); + + // If we weren't given an alignment, use the natural alignment. + if (!alignment) alignment = typeInfo.second.getQuantity(); + CharUnits sizeToCopy = typeInfo.first; + + // There's some special logic that applies to C++ classes: if (getContext().getLangOpts().CPlusPlus) { - if (const RecordType *RT = Ty->getAs()) { - CXXRecordDecl *Record = cast(RT->getDecl()); - assert((Record->hasTrivialCopyConstructor() || - Record->hasTrivialCopyAssignment() || - Record->hasTrivialMoveConstructor() || - Record->hasTrivialMoveAssignment()) && + if (const RecordType *RT = type->getAs()) { + // First, we want to assert that we're not doing this to + // something with a non-trivial operator/constructor. + CXXRecordDecl *record = cast(RT->getDecl()); + assert((record->hasTrivialCopyConstructor() || + record->hasTrivialCopyAssignment() || + record->hasTrivialMoveConstructor() || + record->hasTrivialMoveAssignment()) && "Trying to aggregate-copy a type without a trivial copy " "constructor or assignment operator"); - // Ignore empty classes in C++. - if (Record->isEmpty()) + + // Second, we want to ignore empty classes. + if (record->isEmpty()) return; + + // Third, if it's possible that the destination might not be a + // complete object, then we need to make sure we only copy the + // data size, not the full sizeof, so that we don't overwrite + // subclass fields in the tailing padding. It's generally going + // to be more efficient to copy the sizeof, since we can use + // larger stores. + // + // Unions and final classes can never be base classes. + if (!destIsCompleteObject && !record->isUnion() && + !record->hasAttr()) { + const ASTRecordLayout &layout + = getContext().getASTRecordLayout(record); + sizeToCopy = layout.getDataSize(); + } } } - // Aggregate assignment turns into llvm.memcpy. This is almost valid per - // C99 6.5.16.1p3, which states "If the value being stored in an object is - // read from another object that overlaps in anyway the storage of the first - // object, then the overlap shall be exact and the two objects shall have - // qualified or unqualified versions of a compatible type." - // - // memcpy is not defined if the source and destination pointers are exactly - // equal, but other compilers do this optimization, and almost every memcpy - // implementation handles this case safely. If there is a libc that does not - // safely handle this, we can add a target hook. - - // Get size and alignment info for this aggregate. - std::pair TypeInfo = - getContext().getTypeInfoInChars(Ty); - - if (!Alignment) - Alignment = TypeInfo.second.getQuantity(); - - // FIXME: Handle variable sized types. - - // FIXME: If we have a volatile struct, the optimizer can remove what might - // appear to be `extra' memory ops: - // - // volatile struct { int i; } a, b; - // - // int main() { - // a = b; - // a = b; - // } - // - // we need to use a different call here. We use isVolatile to indicate when - // either the source or the destination is volatile. - - llvm::PointerType *DPT = cast(DestPtr->getType()); + llvm::PointerType *DPT = cast(dest->getType()); llvm::Type *DBP = llvm::Type::getInt8PtrTy(getLLVMContext(), DPT->getAddressSpace()); - DestPtr = Builder.CreateBitCast(DestPtr, DBP); + dest = Builder.CreateBitCast(dest, DBP); - llvm::PointerType *SPT = cast(SrcPtr->getType()); + llvm::PointerType *SPT = cast(src->getType()); llvm::Type *SBP = llvm::Type::getInt8PtrTy(getLLVMContext(), SPT->getAddressSpace()); - SrcPtr = Builder.CreateBitCast(SrcPtr, SBP); + src = Builder.CreateBitCast(src, SBP); + + llvm::Value *sizeVal = + llvm::ConstantInt::get(CGM.SizeTy, sizeToCopy.getQuantity()); // Don't do any of the memmove_collectable tests if GC isn't set. if (CGM.getLangOpts().getGC() == LangOptions::NonGC) { // fall through - } else if (const RecordType *RecordTy = Ty->getAs()) { - RecordDecl *Record = RecordTy->getDecl(); - if (Record->hasObjectMember()) { - CharUnits size = TypeInfo.first; - llvm::Type *SizeTy = ConvertType(getContext().getSizeType()); - llvm::Value *SizeVal = llvm::ConstantInt::get(SizeTy, size.getQuantity()); - CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, DestPtr, SrcPtr, - SizeVal); + } else if (const RecordType *RT = type->getAs()) { + if (RT->getDecl()->hasObjectMember()) { + CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, dest, src, sizeVal); return; } - } else if (Ty->isArrayType()) { - QualType BaseType = getContext().getBaseElementType(Ty); - if (const RecordType *RecordTy = BaseType->getAs()) { - if (RecordTy->getDecl()->hasObjectMember()) { - CharUnits size = TypeInfo.first; - llvm::Type *SizeTy = ConvertType(getContext().getSizeType()); - llvm::Value *SizeVal = - llvm::ConstantInt::get(SizeTy, size.getQuantity()); - CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, DestPtr, SrcPtr, - SizeVal); + } else if (type->isArrayType()) { + QualType baseType = getContext().getBaseElementType(type); + if (const RecordType *RT = baseType->getAs()) { + if (RT->getDecl()->hasObjectMember()) { + CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, dest, src,sizeVal); return; } } } + + // Aggregate assignment turns into llvm.memcpy. This is almost valid per + // C99 6.5.16.1p3, which states "If the value being stored in an object is + // read from another object that overlaps in anyway the storage of the first + // object, then the overlap shall be exact and the two objects shall have + // qualified or unqualified versions of a compatible type." + // + // memcpy is not defined if the source and destination pointers are exactly + // equal, but other compilers do this optimization, and almost every memcpy + // implementation handles this case safely. If there is a libc that does not + // safely handle this, we can add a target hook. - Builder.CreateMemCpy(DestPtr, SrcPtr, - llvm::ConstantInt::get(IntPtrTy, - TypeInfo.first.getQuantity()), - Alignment, isVolatile); + Builder.CreateMemCpy(dest, src, sizeVal, alignment, isVolatile); } void CodeGenFunction::MaybeEmitStdInitializerListCleanup(llvm::Value *loc, diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index d3ba770747..cefe7b64d0 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -781,7 +781,8 @@ static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init, = AggValueSlot::forAddr(NewPtr, Alignment, AllocType.getQualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject); CGF.EmitAggExpr(Init, Slot); CGF.MaybeEmitStdInitializerListCleanup(NewPtr, Init); diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index b18060c3c5..3f67562751 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -885,7 +885,8 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // The return value slot is guaranteed to not be aliased, but // that's not necessarily the same as "on the stack", so // we still potentially need objc_memmove_collectable. - EmitAggregateCopy(ReturnValue, LV.getAddress(), ivarType); + EmitAggregateCopy(ReturnValue, LV.getAddress(), ivarType, + /*volatile*/ false, 0, /*destIsCompleteObject*/ true); } else { llvm::Value *value; if (propType->isReferenceType()) { @@ -1309,7 +1310,8 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP, EmitAggExpr(IvarInit->getInit(), AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject)); } // constructor returns 'self'. CodeGenTypes &Types = CGM.getTypes(); @@ -2931,7 +2933,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( AggValueSlot::forAddr(DV.getScalarVal(), Alignment, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject)); FinishFunction(); HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 670167b959..489386dfa2 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -722,7 +722,8 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) { if (RV.isScalar()) { Builder.CreateStore(RV.getScalarVal(), ReturnValue); } else if (RV.isAggregate()) { - EmitAggregateCopy(ReturnValue, RV.getAggregateAddr(), Ty); + EmitAggregateCopy(ReturnValue, RV.getAggregateAddr(), Ty, + /*volatile*/ false, 0, /*destIsCompleteObject*/ true); } else { StoreComplexToAddr(RV.getComplexVal(), ReturnValue, false); } @@ -769,7 +770,8 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue, Alignment, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject)); } EmitBranchThroughCleanup(ReturnBlock); diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index ac704e7dca..731b6abcff 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -318,22 +318,22 @@ class AggValueSlot { // Qualifiers Qualifiers Quals; - unsigned short Alignment; + unsigned Alignment : 16; /// DestructedFlag - This is set to true if some external code is /// responsible for setting up a destructor for the slot. Otherwise /// the code which constructs it should push the appropriate cleanup. - bool DestructedFlag : 1; + unsigned DestructedFlag : 1; /// ObjCGCFlag - This is set to true if writing to the memory in the /// slot might require calling an appropriate Objective-C GC /// barrier. The exact interaction here is unnecessarily mysterious. - bool ObjCGCFlag : 1; + unsigned ObjCGCFlag : 1; /// ZeroedFlag - This is set to true if the memory in the slot is /// known to be zero before the assignment into it. This means that /// zero fields don't need to be set. - bool ZeroedFlag : 1; + unsigned ZeroedFlag : 1; /// AliasedFlag - This is set to true if the slot might be aliased /// and it's not undefined behavior to access it through such an @@ -347,19 +347,32 @@ class AggValueSlot { /// over. Since it's invalid in general to memcpy a non-POD C++ /// object, it's important that this flag never be set when /// evaluating an expression which constructs such an object. - bool AliasedFlag : 1; + unsigned AliasedFlag : 1; + + /// CompleteObjectFlag - This is set to true if the slot is known to + /// be a complete object. When emitting an aggregate copy of a + /// non-POD C++ struct to a location which may not be a complete + /// object, only the data size of the type can be copied in order to + /// prevent unrelated fields from being overwritten. + unsigned CompleteObjectFlag : 1; public: enum IsAliased_t { IsNotAliased, IsAliased }; enum IsDestructed_t { IsNotDestructed, IsDestructed }; enum IsZeroed_t { IsNotZeroed, IsZeroed }; + enum IsCompleteObject_t { + IsNotCompleteObject, + MayNotBeCompleteObject = IsNotCompleteObject, + IsCompleteObject + }; enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers }; /// ignored - Returns an aggregate value slot indicating that the /// aggregate value is being ignored. static AggValueSlot ignored() { return forAddr(0, CharUnits(), Qualifiers(), IsNotDestructed, - DoesNotNeedGCBarriers, IsNotAliased); + DoesNotNeedGCBarriers, IsNotAliased, + IsCompleteObject); } /// forAddr - Make a slot for an aggregate value. @@ -377,6 +390,7 @@ public: IsDestructed_t isDestructed, NeedsGCBarriers_t needsGC, IsAliased_t isAliased, + IsCompleteObject_t isCompleteObject, IsZeroed_t isZeroed = IsNotZeroed) { AggValueSlot AV; AV.Addr = addr; @@ -386,15 +400,18 @@ public: AV.ObjCGCFlag = needsGC; AV.ZeroedFlag = isZeroed; AV.AliasedFlag = isAliased; + AV.CompleteObjectFlag = isCompleteObject; return AV; } static AggValueSlot forLValue(LValue LV, IsDestructed_t isDestructed, NeedsGCBarriers_t needsGC, IsAliased_t isAliased, + IsCompleteObject_t isCompleteObject, IsZeroed_t isZeroed = IsNotZeroed) { return forAddr(LV.getAddress(), LV.getAlignment(), - LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed); + LV.getQuals(), isDestructed, needsGC, isAliased, + isCompleteObject, isZeroed); } IsDestructed_t isExternallyDestructed() const { @@ -434,6 +451,10 @@ public: return IsAliased_t(AliasedFlag); } + IsCompleteObject_t isCompleteObject() const { + return IsCompleteObject_t(CompleteObjectFlag); + } + // FIXME: Alignment? RValue asRValue() const { return RValue::getAggregate(getAddr(), isVolatile()); diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 85cbd143d8..9acfc5e93c 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -1596,7 +1596,9 @@ public: T.getQualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::IsCompleteObject, + AggValueSlot::IsNotZeroed); } /// Emit a cast to void* in the appropriate address space. @@ -1628,9 +1630,10 @@ public: RValue EmitAnyExprToTemp(const Expr *E); /// EmitAnyExprToMem - Emits the code necessary to evaluate an - /// arbitrary expression into the given memory location. + /// arbitrary expression as an initialization of the given memory + /// location. void EmitAnyExprToMem(const Expr *E, llvm::Value *Location, - Qualifiers Quals, bool IsInitializer); + Qualifiers Quals); /// EmitExprAsInit - Emits the code necessary to initialize a /// location in memory with the given initializer. @@ -1641,9 +1644,12 @@ public: /// /// \param isVolatile - True iff either the source or the destination is /// volatile. + /// \param destIsCompleteObject - True if the destination is known to be + /// a complete object. void EmitAggregateCopy(llvm::Value *DestPtr, llvm::Value *SrcPtr, QualType EltTy, bool isVolatile=false, - unsigned Alignment = 0); + unsigned alignment = 0, + bool destIsCompleteObject = false); /// StartBlock - Start new block named N. If insert block is a dummy block /// then reuse it. diff --git a/test/CodeGenCXX/assign-operator.cpp b/test/CodeGenCXX/assign-operator.cpp index e19df272c9..00058a2ed5 100644 --- a/test/CodeGenCXX/assign-operator.cpp +++ b/test/CodeGenCXX/assign-operator.cpp @@ -28,3 +28,24 @@ namespace test1 { A a; } + +// PR12204 +namespace test2 { + struct A { + A() {} // make this non-POD to enable tail layout + void *ptr; + char c; + }; + + void test(A &out) { + out = A(); + } +} +// CHECK: define void @_ZN5test24testERNS_1AE( +// CHECK: [[OUT:%.*]] = alloca [[A:%.*]]*, align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[A]], align 8 +// CHECK: [[REF:%.*]] = load [[A]]** [[OUT]], align 8 +// CHECK-NEXT: call void @_ZN5test21AC1Ev([[A]]* [[TMP]]) +// CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[REF]] to i8* +// CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[TMP]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 9, i32 8, i1 false) -- 2.40.0