From: Eli Friedman Date: Wed, 12 Jun 2013 00:13:45 +0000 (+0000) Subject: Make va_arg and argument passing to varargs functions work correctly with X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7a1b586a383622e3287a5f3d82736ec513032744;p=clang Make va_arg and argument passing to varargs functions work correctly with AVX vectors when AVX is turned on. Fixes . git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@183813 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 179fdb3a92..981a38d7ff 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -1128,6 +1128,9 @@ class X86_64ABIInfo : public ABIInfo { /// containing object. Some parameters are classified different /// depending on whether they straddle an eightbyte boundary. /// + /// \param isNamedArg - Whether the argument in question is a "named" + /// argument, as used in AMD64-ABI 3.5.7. + /// /// If a word is unused its result will be NoClass; if a type should /// be passed in Memory then at least the classification of \arg Lo /// will be Memory. @@ -1136,7 +1139,8 @@ class X86_64ABIInfo : public ABIInfo { /// /// If the \arg Lo class is ComplexX87, then the \arg Hi class will /// also be ComplexX87. - void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi) const; + void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, + bool isNamedArg) const; llvm::Type *GetByteVectorType(QualType Ty) const; llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, @@ -1162,7 +1166,8 @@ class X86_64ABIInfo : public ABIInfo { ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, unsigned &neededInt, - unsigned &neededSSE) const; + unsigned &neededSSE, + bool isNamedArg) const; bool IsIllegalVectorType(QualType Ty) const; @@ -1189,7 +1194,8 @@ public: bool isPassedUsingAVXType(QualType type) const { unsigned neededInt, neededSSE; // The freeIntRegs argument doesn't matter here. - ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE); + ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE, + /*isNamedArg*/true); if (info.isDirect()) { llvm::Type *ty = info.getCoerceToType(); if (llvm::VectorType *vectorTy = dyn_cast_or_null(ty)) @@ -1411,7 +1417,7 @@ X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) { } void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, - Class &Lo, Class &Hi) const { + Class &Lo, Class &Hi, bool isNamedArg) const { // FIXME: This code can be simplified by introducing a simple value class for // Class pairs with appropriate constructor methods for the various // situations. @@ -1450,7 +1456,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, if (const EnumType *ET = Ty->getAs()) { // Classify the underlying integer type. - classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi); + classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg); return; } @@ -1498,7 +1504,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, // split. if (OffsetBase && OffsetBase != 64) Hi = Lo; - } else if (Size == 128 || (HasAVX && Size == 256)) { + } else if (Size == 128 || (HasAVX && isNamedArg && Size == 256)) { // Arguments of 256-bits are split into four eightbyte chunks. The // least significant one belongs to class SSE and all the others to class // SSEUP. The original Lo and Hi design considers that types can't be @@ -1506,6 +1512,10 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, // This design isn't correct for 256-bits, but since there're no cases // where the upper parts would need to be inspected, avoid adding // complexity and just consider Hi to match the 64-256 part. + // + // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in + // registers if they are "named", i.e. not part of the "..." of a + // variadic function. Lo = SSE; Hi = SSEUp; } @@ -1571,7 +1581,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, for (uint64_t i=0, Offset=OffsetBase; igetElementType(), Offset, FieldLo, FieldHi); + classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) @@ -1625,7 +1635,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class FieldLo, FieldHi; uint64_t Offset = OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); - classify(i->getType(), Offset, FieldLo, FieldHi); + classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) @@ -1688,7 +1698,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, FieldHi = EB_Hi ? Integer : NoClass; } } else - classify(i->getType(), Offset, FieldLo, FieldHi); + classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) @@ -2076,7 +2086,7 @@ classifyReturnType(QualType RetTy) const { // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the // classification algorithm. X86_64ABIInfo::Class Lo, Hi; - classify(RetTy, 0, Lo, Hi); + classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); // Check some invariants. assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); @@ -2201,11 +2211,12 @@ classifyReturnType(QualType RetTy) const { } ABIArgInfo X86_64ABIInfo::classifyArgumentType( - QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE) + QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE, + bool isNamedArg) const { X86_64ABIInfo::Class Lo, Hi; - classify(Ty, 0, Lo, Hi); + classify(Ty, 0, Lo, Hi, isNamedArg); // Check some invariants. // FIXME: Enforce these by construction. @@ -2338,13 +2349,22 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { if (FI.getReturnInfo().isIndirect()) --freeIntRegs; + bool isVariadic = FI.isVariadic(); + unsigned numRequiredArgs = 0; + if (isVariadic) + numRequiredArgs = FI.getRequiredArgs().getNumRequiredArgs(); + // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers // get assigned (in left-to-right order) for passing as follows... for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); it != ie; ++it) { + bool isNamedArg = true; + if (isVariadic) + isNamedArg = (it - FI.arg_begin()) < numRequiredArgs; + unsigned neededInt, neededSSE; it->info = classifyArgumentType(it->type, freeIntRegs, neededInt, - neededSSE); + neededSSE, isNamedArg); // AMD64-ABI 3.2.3p3: If there are no registers available for any // eightbyte of an argument, the whole argument is passed on the @@ -2420,7 +2440,8 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, unsigned neededInt, neededSSE; Ty = CGF.getContext().getCanonicalType(Ty); - ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE); + ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, + /*isNamedArg*/false); // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed // in the registers. If not go to step 7. diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c index 7bd534407c..10e6d5bd65 100644 --- a/test/CodeGen/x86_64-arguments.c +++ b/test/CodeGen/x86_64-arguments.c @@ -411,3 +411,26 @@ void test51(struct test51_s *s, __builtin_va_list argList) { // CHECK-NEXT: add i32 {{.*}}, 16 // CHECK-NEXT: store i32 {{.*}}, i32* {{.*}} // CHECK-NEXT: br label + +void test52_helper(int, ...); +__m256 x52; +void test52() { + test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); +} +// AVX: @test52_helper(i32 0, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) + +void test53(__m256 *m, __builtin_va_list argList) { + *m = __builtin_va_arg(argList, __m256); +} +// AVX: define void @test53 +// AVX-NOT: br i1 +// AVX: ret void + +void test54_helper(__m256, ...); +__m256 x54; +void test54() { + test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); + test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); +} +// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) +// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})