From: Chris Lattner Date: Tue, 29 Jun 2010 00:06:42 +0000 (+0000) Subject: make the argument passing stuff in the FCA case smarter still, by X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=309c59f6d3a4fd883fdf87334271df2c55338aae;p=clang make the argument passing stuff in the FCA case smarter still, by avoiding making the FCA at all when the types exactly line up. For example, before we made: %struct.DeclGroup = type { i64, i64 } define i64 @_Z3foo9DeclGroup(i64, i64) nounwind { entry: %D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3] %2 = insertvalue %struct.DeclGroup undef, i64 %0, 0 ; <%struct.DeclGroup> [#uses=1] %3 = insertvalue %struct.DeclGroup %2, i64 %1, 1 ; <%struct.DeclGroup> [#uses=1] store %struct.DeclGroup %3, %struct.DeclGroup* %D %tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; [#uses=1] %tmp1 = load i64* %tmp ; [#uses=1] %tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; [#uses=1] %tmp3 = load i64* %tmp2 ; [#uses=1] %add = add nsw i64 %tmp1, %tmp3 ; [#uses=1] ret i64 %add } ... which has the pointless insertvalue, which fastisel hates, now we make: %struct.DeclGroup = type { i64, i64 } define i64 @_Z3foo9DeclGroup(i64, i64) nounwind { entry: %D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=4] %2 = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; [#uses=1] store i64 %0, i64* %2 %3 = getelementptr %struct.DeclGroup* %D, i32 0, i32 1 ; [#uses=1] store i64 %1, i64* %3 %tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; [#uses=1] %tmp1 = load i64* %tmp ; [#uses=1] %tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; [#uses=1] %tmp3 = load i64* %tmp2 ; [#uses=1] %add = add nsw i64 %tmp1, %tmp3 ; [#uses=1] ret i64 %add } This only kicks in when x86-64 abi lowering decides it likes us. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@107104 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index eb517edd81..4d72d91cb7 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -894,29 +894,41 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, continue; case ABIArgInfo::Coerce: { + // FIXME: This is very wasteful; EmitParmDecl is just going to drop the + // result in a new alloca anyway, so we could just store into that + // directly if we broke the abstraction down more. + llvm::Value *V = CreateMemTemp(Ty, "coerce"); + // If the coerce-to type is a first class aggregate, we flatten it and // pass the elements. Either way is semantically identical, but fast-isel // and the optimizer generally likes scalar values better than FCAs. - llvm::Value *FormalArg; if (const llvm::StructType *STy = dyn_cast(ArgI.getCoerceToType())) { - // Reconstruct the FCA here. - // FIXME: If we have a direct match, do nice gep/store series. - FormalArg = llvm::UndefValue::get(STy); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - assert(AI != Fn->arg_end() && "Argument mismatch!"); - FormalArg = Builder.CreateInsertValue(FormalArg, AI++, i); + // If the argument and alloca types match up, we don't have to build the + // FCA at all, emit a series of GEPs and stores, which is better for + // fast isel. + if (STy == cast(V->getType())->getElementType()) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + assert(AI != Fn->arg_end() && "Argument mismatch!"); + llvm::Value *EltPtr = Builder.CreateConstGEP2_32(V, 0, i); + Builder.CreateStore(AI++, EltPtr); + } + } else { + // Reconstruct the FCA here so we can do a coerced store. + llvm::Value *FormalArg = llvm::UndefValue::get(STy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + assert(AI != Fn->arg_end() && "Argument mismatch!"); + FormalArg = Builder.CreateInsertValue(FormalArg, AI++, i); + } + CreateCoercedStore(FormalArg, V, /*DestIsVolatile=*/false, *this); } } else { + // Simple case, just do a coerced store of the argument into the alloca. assert(AI != Fn->arg_end() && "Argument mismatch!"); - FormalArg = AI++; + CreateCoercedStore(AI++, V, /*DestIsVolatile=*/false, *this); } - // FIXME: This is very wasteful; EmitParmDecl is just going to drop the - // result in a new alloca anyway, so we could just store into that - // directly if we broke the abstraction down more. - llvm::Value *V = CreateMemTemp(Ty, "coerce"); - CreateCoercedStore(FormalArg, V, /*DestIsVolatile=*/false, *this); + // Match to what EmitParmDecl is expecting for this type. if (!CodeGenFunction::hasAggregateLLVMType(Ty)) { V = EmitLoadOfScalar(V, false, Ty); @@ -1116,19 +1128,32 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } else SrcPtr = RV.getAggregateAddr(); - llvm::Value *SrcVal = - CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this); - // If the coerce-to type is a first class aggregate, we flatten it and // pass the elements. Either way is semantically identical, but fast-isel // and the optimizer generally likes scalar values better than FCAs. if (const llvm::StructType *STy = - dyn_cast(SrcVal->getType())) { - // Extract the elements of the value to pass in. - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - Args.push_back(Builder.CreateExtractValue(SrcVal, i)); + dyn_cast(ArgInfo.getCoerceToType())) { + // If the argument and alloca types match up, we don't have to build the + // FCA at all, emit a series of GEPs and loads, which is better for + // fast isel. + if (STy ==cast(SrcPtr->getType())->getElementType()){ + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + llvm::Value *EltPtr = Builder.CreateConstGEP2_32(SrcPtr, 0, i); + Args.push_back(Builder.CreateLoad(EltPtr)); + } + } else { + // Otherwise, do a coerced load the entire FCA and handle the pieces. + llvm::Value *SrcVal = + CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this); + + // Extract the elements of the value to pass in. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Args.push_back(Builder.CreateExtractValue(SrcVal, i)); + } } else { - Args.push_back(SrcVal); + // In the simple case, just pass the coerced loaded value. + Args.push_back(CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), + *this)); } break;