From 6cacae8bf9597b8124cd40aedc189c04484e1990 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 30 Sep 2012 12:43:37 +0000 Subject: [PATCH] CodeGen: Copy tail padding when we're not dealing with a trivial copy assign or move assign operator. This fixes a regression from r162254, the optimizer has problems reasoning about the smaller memcpy as it's often not safe to widen a store but making it smaller is. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@164917 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGExprAgg.cpp | 13 ++- lib/CodeGen/CGExprCXX.cpp | 4 +- lib/CodeGen/CodeGenFunction.h | 15 +++- test/CodeGenCXX/assign-construct-memcpy.cpp | 89 +++++++++++++++++++++ 4 files changed, 114 insertions(+), 7 deletions(-) create mode 100644 test/CodeGenCXX/assign-construct-memcpy.cpp diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index c9ffc19a8b..200b43aa6d 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -1274,7 +1274,8 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) { void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr, llvm::Value *SrcPtr, QualType Ty, bool isVolatile, - CharUnits alignment) { + CharUnits alignment, + bool isAssignment) { assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex"); if (getContext().getLangOpts().CPlusPlus) { @@ -1303,9 +1304,13 @@ void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr, // implementation handles this case safely. If there is a libc that does not // safely handle this, we can add a target hook. - // Get data size and alignment info for this aggregate. - std::pair TypeInfo = - getContext().getTypeInfoDataSizeInChars(Ty); + // Get data size and alignment info for this aggregate. If this is an + // assignment don't copy the tail padding. Otherwise copying it is fine. + std::pair TypeInfo; + if (isAssignment) + TypeInfo = getContext().getTypeInfoDataSizeInChars(Ty); + else + TypeInfo = getContext().getTypeInfoInChars(Ty); if (alignment.isZero()) alignment = TypeInfo.second; diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 7276440a47..16311963b3 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -241,7 +241,7 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE, // We don't like to generate the trivial copy/move assignment operator // when it isn't necessary; just produce the proper effect here. llvm::Value *RHS = EmitLValue(*CE->arg_begin()).getAddress(); - EmitAggregateCopy(This, RHS, CE->getType()); + EmitAggregateAssign(This, RHS, CE->getType()); return RValue::get(This); } @@ -378,7 +378,7 @@ CodeGenFunction::EmitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E, MD->isTrivial()) { llvm::Value *Src = EmitLValue(E->getArg(1)).getAddress(); QualType Ty = E->getType(); - EmitAggregateCopy(This, Src, Ty); + EmitAggregateAssign(This, Src, Ty); return RValue::get(This); } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index fc930ec158..4a0c35bd82 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -1653,13 +1653,26 @@ public: void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit); + /// EmitAggregateCopy - Emit an aggrate assignment. + /// + /// The difference to EmitAggregateCopy is that tail padding is not copied. + /// This is required for correctness when assigning non-POD structures in C++. + void EmitAggregateAssign(llvm::Value *DestPtr, llvm::Value *SrcPtr, + QualType EltTy, bool isVolatile=false, + CharUnits Alignment = CharUnits::Zero()) { + EmitAggregateCopy(DestPtr, SrcPtr, EltTy, isVolatile, Alignment, true); + } + /// EmitAggregateCopy - Emit an aggrate copy. /// /// \param isVolatile - True iff either the source or the destination is /// volatile. + /// \param isAssignment - If false, allow padding to be copied. This often + /// yields more efficient. void EmitAggregateCopy(llvm::Value *DestPtr, llvm::Value *SrcPtr, QualType EltTy, bool isVolatile=false, - CharUnits Alignment = CharUnits::Zero()); + CharUnits Alignment = CharUnits::Zero(), + bool isAssignment = false); /// StartBlock - Start new block named N. If insert block is a dummy block /// then reuse it. diff --git a/test/CodeGenCXX/assign-construct-memcpy.cpp b/test/CodeGenCXX/assign-construct-memcpy.cpp new file mode 100644 index 0000000000..3d42051324 --- /dev/null +++ b/test/CodeGenCXX/assign-construct-memcpy.cpp @@ -0,0 +1,89 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin12 -emit-llvm -o - -std=c++11 %s -DPOD | FileCheck %s -check-prefix=CHECK-POD +// RUN: %clang_cc1 -triple x86_64-apple-darwin12 -emit-llvm -o - -std=c++11 %s | FileCheck %s -check-prefix=CHECK-NONPOD + +// Declare the reserved placement operators. +typedef __typeof__(sizeof(0)) size_t; +void *operator new(size_t, void*) throw(); +void operator delete(void*, void*) throw(); +void *operator new[](size_t, void*) throw(); +void operator delete[](void*, void*) throw(); +template T &&move(T&); + +struct foo { +#ifndef POD + foo() {} // non-POD +#endif + void *a, *b; + bool c; +}; + +// It is not legal to copy the tail padding in all cases, but if it is it can +// yield better codegen. + +foo *test1(void *f, const foo &x) { + return new (f) foo(x); +// CHECK-POD: test1 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 + +// CHECK-NONPOD: test1 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +} + +foo *test2(const foo &x) { + return new foo(x); +// CHECK-POD: test2 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 + +// CHECK-NONPOD: test2 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +} + +foo test3(const foo &x) { + foo f = x; + return f; +// CHECK-POD: test3 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 + +// CHECK-NONPOD: test3 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +} + +foo *test4(foo &&x) { + return new foo(x); +// CHECK-POD: test4 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 + +// CHECK-NONPOD: test4 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +} + +void test5(foo &f, const foo &x) { + f = x; +// CHECK-POD: test5 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 + +// CHECK-NONPOD: test5 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 17, i32 8 +} + +extern foo globtest; + +void test6(foo &&x) { + globtest = move(x); +// CHECK-POD: test6 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 + +// CHECK-NONPOD: test6 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 17, i32 8 +} + +void byval(foo f); + +void test7(const foo &x) { + byval(x); +// CHECK-POD: test7 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 + +// CHECK-NONPOD: test7 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +} -- 2.40.0