From: David Majnemer Date: Mon, 15 Aug 2016 06:39:18 +0000 (+0000) Subject: [CodeGen] Correctly implement the AVX512 psABI rules X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f9187a1ffdf709ce72b750295d6b4177c53255e6;p=clang [CodeGen] Correctly implement the AVX512 psABI rules An __m512 vector type wrapped in a structure should be passed in a vector register. Our prior implementation was based on a draft version of the psABI. This fixes PR28975. N.B. The update to the ABI was made here: https://github.com/hjl-tools/x86-psABI/commit/30f9c9 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@278655 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index fa1b58ddd5..7b22ddc431 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -2492,8 +2492,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, uint64_t Size = getContext().getTypeSize(Ty); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger - // than four eightbytes, ..., it has class MEMORY. - if (Size > 256) + // than eight eightbytes, ..., it has class MEMORY. + if (Size > 512) return; // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned @@ -2512,7 +2512,9 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, // The only case a 256-bit wide vector could be used is when the array // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. - if (Size > 128 && EltSize != 256) + // + if (Size > 128 && + (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) return; for (uint64_t i=0, Offset=OffsetBase; i 256) + // than eight eightbytes, ..., it has class MEMORY. + if (Size > 512) return; // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial @@ -2594,7 +2596,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. // - if (Size > 128 && getContext().getTypeSize(i->getType()) != 256) { + if (Size > 128 && (Size != getContext().getTypeSize(i->getType()) || + Size > getNativeVectorSizeForAVXABI(AVXLevel))) { Lo = Memory; postMerge(Size, Lo, Hi); return; @@ -2749,7 +2752,7 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { // We couldn't find the preferred IR vector type for 'Ty'. uint64_t Size = getContext().getTypeSize(Ty); - assert((Size == 128 || Size == 256) && "Invalid type found!"); + assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); // Return a LLVM IR vector type based on the size of 'Ty'. return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c index a2d60cc6b1..de7cfd9d57 100644 --- a/test/CodeGen/x86_64-arguments.c +++ b/test/CodeGen/x86_64-arguments.c @@ -470,13 +470,14 @@ typedef struct { s512 x55; __m512 x56; -// Even on AVX512, aggregates of size larger than four eightbytes have class -// MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1). +// On AVX512, aggregates which contain a __m512 type are classified as SSE/SSEUP +// as per https://github.com/hjl-tools/x86-psABI/commit/30f9c9 3.2.3p2 Rule 1 // -// CHECK: declare void @f55(%struct.s512* byval align 64) +// AVX512: declare void @f55(<16 x float>) +// NO-AVX512: declare void @f55(%struct.s512* byval align 64) void f55(s512 x); -// However, __m512 has type SSE/SSEUP on AVX512. +// __m512 has type SSE/SSEUP on AVX512. // // AVX512: declare void @f56(<16 x float>) // NO-AVX512: declare void @f56(<16 x float>* byval align 64)