uint64_t Size = getContext().getTypeSize(Ty);
// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
- // than four eightbytes, ..., it has class MEMORY.
- if (Size > 256)
+ // than eight eightbytes, ..., it has class MEMORY.
+ if (Size > 512)
return;
// AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned
// The only case a 256-bit wide vector could be used is when the array
// contains a single 256-bit element. Since Lo and Hi logic isn't extended
// to work for sizes wider than 128, early check and fallback to memory.
- if (Size > 128 && EltSize != 256)
+ //
+ if (Size > 128 &&
+ (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel)))
return;
for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) {
uint64_t Size = getContext().getTypeSize(Ty);
// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
- // than four eightbytes, ..., it has class MEMORY.
- if (Size > 256)
+ // than eight eightbytes, ..., it has class MEMORY.
+ if (Size > 512)
return;
// AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial
// contains a single 256-bit element. Since Lo and Hi logic isn't extended
// to work for sizes wider than 128, early check and fallback to memory.
//
- if (Size > 128 && getContext().getTypeSize(i->getType()) != 256) {
+ if (Size > 128 && (Size != getContext().getTypeSize(i->getType()) ||
+ Size > getNativeVectorSizeForAVXABI(AVXLevel))) {
Lo = Memory;
postMerge(Size, Lo, Hi);
return;
// We couldn't find the preferred IR vector type for 'Ty'.
uint64_t Size = getContext().getTypeSize(Ty);
- assert((Size == 128 || Size == 256) && "Invalid type found!");
+ assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!");
// Return a LLVM IR vector type based on the size of 'Ty'.
return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()),
s512 x55;
__m512 x56;
-// Even on AVX512, aggregates of size larger than four eightbytes have class
-// MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1).
+// On AVX512, aggregates which contain a __m512 type are classified as SSE/SSEUP
+// as per https://github.com/hjl-tools/x86-psABI/commit/30f9c9 3.2.3p2 Rule 1
//
-// CHECK: declare void @f55(%struct.s512* byval align 64)
+// AVX512: declare void @f55(<16 x float>)
+// NO-AVX512: declare void @f55(%struct.s512* byval align 64)
void f55(s512 x);
-// However, __m512 has type SSE/SSEUP on AVX512.
+// __m512 has type SSE/SSEUP on AVX512.
//
// AVX512: declare void @f56(<16 x float>)
// NO-AVX512: declare void @f56(<16 x float>* byval align 64)