From: Chris Lattner Date: Sun, 27 Jun 2010 05:56:15 +0000 (+0000) Subject: improve CreateCoercedLoad a bit to generate slightly less awful X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=08dd2a0e4c6b3c7cb601e5053eb02cb7d084f87e;p=clang improve CreateCoercedLoad a bit to generate slightly less awful IR when handling X86-64 by-value struct stuff. For example, we use to compile this: struct DeclGroup { unsigned NumDecls; }; int foo(DeclGroup D); void bar(DeclGroup *D) { foo(*D); } into: define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind { entry: %D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2] %agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2] %tmp3 = alloca i64 ; [#uses=2] store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr %tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1] %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; [#uses=1] %tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; [#uses=1] call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false) %0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1] %1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1] store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1 %2 = load i64* %tmp3 ; [#uses=1] call void @_Z3foo9DeclGroup(i64 %2) ret void } which would cause fastisel to bail out due to the first class aggregate load %1. With this patch we now compile it into the (still awful): define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone { entry: %D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2] %agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2] %tmp3 = alloca i64 ; [#uses=2] store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr %tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1] %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; [#uses=1] %tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; [#uses=1] call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false) %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; [#uses=1] %0 = bitcast i64* %tmp3 to i32* ; [#uses=1] %1 = load i32* %coerce.dive ; [#uses=1] store i32 %1, i32* %0, align 1 %2 = load i64* %tmp3 ; [#uses=1] %call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; [#uses=0] ret void } which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935, and is the precursor of better things to come. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@106973 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 051ef08699..7ec5e303dc 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -345,6 +345,41 @@ CodeGenFunction::ExpandTypeToArgs(QualType Ty, RValue RV, } } +/// EnterStructPointerForCoercedLoad - Given a pointer to a struct where we are +/// accessing some number of bytes out of it, try to gep into the struct to get +/// at its inner goodness. Dive as deep as possible without entering an element +/// with an in-memory size smaller than DstSize. +static llvm::Value * +EnterStructPointerForCoercedLoad(llvm::Value *SrcPtr, + const llvm::StructType *SrcSTy, + uint64_t DstSize, CodeGenFunction &CGF) { + // We can't dive into a zero-element struct. + if (SrcSTy->getNumElements() == 0) return SrcPtr; + + const llvm::Type *FirstElt = SrcSTy->getElementType(0); + + // If the first elt is at least as large as what we're looking for, or if the + // first element is the same size as the whole struct, we can enter it. + uint64_t FirstEltSize = + CGF.CGM.getTargetData().getTypeAllocSize(FirstElt); + if (FirstEltSize < DstSize && + FirstEltSize < CGF.CGM.getTargetData().getTypeAllocSize(SrcSTy)) + return SrcPtr; + + // GEP into the first element. + SrcPtr = CGF.Builder.CreateConstGEP2_32(SrcPtr, 0, 0, "coerce.dive"); + + // If the first element is a struct, recurse. + const llvm::Type *SrcTy = + cast(SrcPtr->getType())->getElementType(); + if (const llvm::StructType *SrcSTy = dyn_cast(SrcTy)) + return EnterStructPointerForCoercedLoad(SrcPtr, SrcSTy, DstSize, CGF); + + return SrcPtr; +} + + + /// CreateCoercedLoad - Create a load from \arg SrcPtr interpreted as /// a pointer to an object of type \arg Ty. /// @@ -356,8 +391,14 @@ static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr, CodeGenFunction &CGF) { const llvm::Type *SrcTy = cast(SrcPtr->getType())->getElementType(); - uint64_t SrcSize = CGF.CGM.getTargetData().getTypeAllocSize(SrcTy); uint64_t DstSize = CGF.CGM.getTargetData().getTypeAllocSize(Ty); + + if (const llvm::StructType *SrcSTy = dyn_cast(SrcTy)) { + SrcPtr = EnterStructPointerForCoercedLoad(SrcPtr, SrcSTy, DstSize, CGF); + SrcTy = cast(SrcPtr->getType())->getElementType(); + } + + uint64_t SrcSize = CGF.CGM.getTargetData().getTypeAllocSize(SrcTy); // If load is legal, just bitcast the src pointer. if (SrcSize >= DstSize) {