From: Ahmed Bougacha Date: Tue, 19 Apr 2016 17:54:29 +0000 (+0000) Subject: [CodeGen] Widen non-power-of-2 vector HFA base types. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c585033ef89cc7abe89b03cc4e94528abe3ee3aa;p=clang [CodeGen] Widen non-power-of-2 vector HFA base types. Currently, for the ppc64--gnu and aarch64 ABIs, we recognize: typedef __attribute__((__ext_vector_type__(3))) float v3f32; typedef __attribute__((__ext_vector_type__(16))) char v16i8; struct HFA { v3f32 a; v16i8 b; }; as an HFA. Since the first type encountered is used as the base type, we pass the HFA as: [2 x <3 x float>] Which leads to incorrect IR (relying on padding values) when the second field is used. Instead, explicitly widen the vector (after size rounding) in isHomogeneousAggregate. Differential Revision: http://reviews.llvm.org/D18998 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@266784 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index a0cc783187..8ad3290192 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -4120,8 +4120,19 @@ bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, // agree in both total size and mode (float vs. vector) are // treated as being equivalent here. const Type *TyPtr = Ty.getTypePtr(); - if (!Base) + if (!Base) { Base = TyPtr; + // If it's a non-power-of-2 vector, its size is already a power-of-2, + // so make sure to widen it explicitly. + if (const VectorType *VT = Base->getAs()) { + QualType EltTy = VT->getElementType(); + unsigned NumElements = + getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy); + Base = getContext() + .getVectorType(EltTy, NumElements, VT->getVectorKind()) + .getTypePtr(); + } + } if (Base->isVectorType() != TyPtr->isVectorType() || getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr)) diff --git a/test/CodeGen/aarch64-arguments-hfa-v3.c b/test/CodeGen/aarch64-arguments-hfa-v3.c new file mode 100644 index 0000000000..59fa5e959e --- /dev/null +++ b/test/CodeGen/aarch64-arguments-hfa-v3.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple arm64-apple-ios -target-feature +neon -target-abi darwinpcs -fallow-half-arguments-and-returns -emit-llvm -o - %s | FileCheck %s + +typedef __attribute__((__ext_vector_type__(16))) signed char int8x16_t; +typedef __attribute__((__ext_vector_type__(3))) float float32x3_t; + +// CHECK: %struct.HFAv3 = type { [4 x <3 x float>] } +typedef struct { float32x3_t arr[4]; } HFAv3; + +// CHECK: %struct.MixedHFAv3 = type { [3 x <3 x float>], <16 x i8> } +typedef struct { float32x3_t arr[3]; int8x16_t b; } MixedHFAv3; + +// CHECK: define %struct.HFAv3 @test([4 x <4 x float>] %{{.*}}, [4 x <4 x float>] %{{.*}}, [4 x <4 x float>] %{{.*}}) +HFAv3 test(HFAv3 a0, HFAv3 a1, HFAv3 a2) { + return a2; +} + +// CHECK: define %struct.MixedHFAv3 @test_mixed([4 x <4 x float>] %{{.*}}, [4 x <4 x float>] %{{.*}}, [4 x <4 x float>] %{{.*}}) +MixedHFAv3 test_mixed(MixedHFAv3 a0, MixedHFAv3 a1, MixedHFAv3 a2) { + return a2; +} diff --git a/test/CodeGen/arm64-arguments.c b/test/CodeGen/arm64-arguments.c index 93a1a19895..f90b8e3b93 100644 --- a/test/CodeGen/arm64-arguments.c +++ b/test/CodeGen/arm64-arguments.c @@ -714,3 +714,34 @@ int32x4_t test_toobig_hva(int n, ...) { struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA); return h.d; } + +typedef __attribute__((__ext_vector_type__(3))) float float32x3_t; +typedef struct { float32x3_t arr[4]; } HFAv3; + +float32x3_t test_hva_v3(int n, ...) { +// CHECK-LABEL: define <3 x float> @test_hva_v3(i32 %n, ...) +// CHECK: [[THELIST:%.*]] = alloca i8* +// CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]] + + // HVA is not indirect, so occupies its full 16 bytes on the stack. but it + // must be properly aligned. +// CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64 +// CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15 +// CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16 +// CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8* + +// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 64 +// CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] + +// CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HFAv3* + __builtin_va_list l; + __builtin_va_start(l, n); + HFAv3 r = __builtin_va_arg(l, HFAv3); + return r.arr[2]; +} + +float32x3_t test_hva_v3_call(HFAv3 *a) { +// CHECK-LABEL: define <3 x float> @test_hva_v3_call(%struct.HFAv3* %a) +// CHECK: call <3 x float> (i32, ...) @test_hva_v3(i32 1, [4 x <4 x float>] {{.*}}) + return test_hva_v3(1, *a); +} diff --git a/test/CodeGen/ppc64le-aggregates.c b/test/CodeGen/ppc64le-aggregates.c index 3ad4b06c68..04d2fb4766 100644 --- a/test/CodeGen/ppc64le-aggregates.c +++ b/test/CodeGen/ppc64le-aggregates.c @@ -255,84 +255,84 @@ struct v3f9 { float3 v[9]; }; struct v3fab { float3 a; float3 b; }; struct v3fabc { float3 a; float3 b; float3 c; }; -// CHECK: define [1 x <3 x float>] @func_v3f1(<3 x float> inreg %x.coerce) +// CHECK: define [1 x <4 x float>] @func_v3f1(<3 x float> inreg %x.coerce) struct v3f1 func_v3f1(struct v3f1 x) { return x; } -// CHECK: define [2 x <3 x float>] @func_v3f2([2 x <3 x float>] %x.coerce) +// CHECK: define [2 x <4 x float>] @func_v3f2([2 x <4 x float>] %x.coerce) struct v3f2 func_v3f2(struct v3f2 x) { return x; } -// CHECK: define [3 x <3 x float>] @func_v3f3([3 x <3 x float>] %x.coerce) +// CHECK: define [3 x <4 x float>] @func_v3f3([3 x <4 x float>] %x.coerce) struct v3f3 func_v3f3(struct v3f3 x) { return x; } -// CHECK: define [4 x <3 x float>] @func_v3f4([4 x <3 x float>] %x.coerce) +// CHECK: define [4 x <4 x float>] @func_v3f4([4 x <4 x float>] %x.coerce) struct v3f4 func_v3f4(struct v3f4 x) { return x; } -// CHECK: define [5 x <3 x float>] @func_v3f5([5 x <3 x float>] %x.coerce) +// CHECK: define [5 x <4 x float>] @func_v3f5([5 x <4 x float>] %x.coerce) struct v3f5 func_v3f5(struct v3f5 x) { return x; } -// CHECK: define [6 x <3 x float>] @func_v3f6([6 x <3 x float>] %x.coerce) +// CHECK: define [6 x <4 x float>] @func_v3f6([6 x <4 x float>] %x.coerce) struct v3f6 func_v3f6(struct v3f6 x) { return x; } -// CHECK: define [7 x <3 x float>] @func_v3f7([7 x <3 x float>] %x.coerce) +// CHECK: define [7 x <4 x float>] @func_v3f7([7 x <4 x float>] %x.coerce) struct v3f7 func_v3f7(struct v3f7 x) { return x; } -// CHECK: define [8 x <3 x float>] @func_v3f8([8 x <3 x float>] %x.coerce) +// CHECK: define [8 x <4 x float>] @func_v3f8([8 x <4 x float>] %x.coerce) struct v3f8 func_v3f8(struct v3f8 x) { return x; } // CHECK: define void @func_v3f9(%struct.v3f9* noalias sret %agg.result, %struct.v3f9* byval align 16 %x) struct v3f9 func_v3f9(struct v3f9 x) { return x; } -// CHECK: define [2 x <3 x float>] @func_v3fab([2 x <3 x float>] %x.coerce) +// CHECK: define [2 x <4 x float>] @func_v3fab([2 x <4 x float>] %x.coerce) struct v3fab func_v3fab(struct v3fab x) { return x; } -// CHECK: define [3 x <3 x float>] @func_v3fabc([3 x <3 x float>] %x.coerce) +// CHECK: define [3 x <4 x float>] @func_v3fabc([3 x <4 x float>] %x.coerce) struct v3fabc func_v3fabc(struct v3fabc x) { return x; } // CHECK-LABEL: @call_v3f1 // CHECK: %[[TMP:[^ ]+]] = load <3 x float>, <3 x float>* getelementptr inbounds (%struct.v3f1, %struct.v3f1* @global_v3f1, i32 0, i32 0, i32 0), align 1 -// CHECK: call [1 x <3 x float>] @func_v3f1(<3 x float> inreg %[[TMP]]) +// CHECK: call [1 x <4 x float>] @func_v3f1(<3 x float> inreg %[[TMP]]) struct v3f1 global_v3f1; void call_v3f1(void) { global_v3f1 = func_v3f1(global_v3f1); } // CHECK-LABEL: @call_v3f2 -// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>], [2 x <3 x float>]* getelementptr inbounds (%struct.v3f2, %struct.v3f2* @global_v3f2, i32 0, i32 0), align 1 -// CHECK: call [2 x <3 x float>] @func_v3f2([2 x <3 x float>] %[[TMP]]) +// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x float>], [2 x <4 x float>]* bitcast (%struct.v3f2* @global_v3f2 to [2 x <4 x float>]*), align 16 +// CHECK: call [2 x <4 x float>] @func_v3f2([2 x <4 x float>] %[[TMP]]) struct v3f2 global_v3f2; void call_v3f2(void) { global_v3f2 = func_v3f2(global_v3f2); } // CHECK-LABEL: @call_v3f3 -// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>], [3 x <3 x float>]* getelementptr inbounds (%struct.v3f3, %struct.v3f3* @global_v3f3, i32 0, i32 0), align 1 -// CHECK: call [3 x <3 x float>] @func_v3f3([3 x <3 x float>] %[[TMP]]) +// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x float>], [3 x <4 x float>]* bitcast (%struct.v3f3* @global_v3f3 to [3 x <4 x float>]*), align 16 +// CHECK: call [3 x <4 x float>] @func_v3f3([3 x <4 x float>] %[[TMP]]) struct v3f3 global_v3f3; void call_v3f3(void) { global_v3f3 = func_v3f3(global_v3f3); } // CHECK-LABEL: @call_v3f4 -// CHECK: %[[TMP:[^ ]+]] = load [4 x <3 x float>], [4 x <3 x float>]* getelementptr inbounds (%struct.v3f4, %struct.v3f4* @global_v3f4, i32 0, i32 0), align 1 -// CHECK: call [4 x <3 x float>] @func_v3f4([4 x <3 x float>] %[[TMP]]) +// CHECK: %[[TMP:[^ ]+]] = load [4 x <4 x float>], [4 x <4 x float>]* bitcast (%struct.v3f4* @global_v3f4 to [4 x <4 x float>]*), align 16 +// CHECK: call [4 x <4 x float>] @func_v3f4([4 x <4 x float>] %[[TMP]]) struct v3f4 global_v3f4; void call_v3f4(void) { global_v3f4 = func_v3f4(global_v3f4); } // CHECK-LABEL: @call_v3f5 -// CHECK: %[[TMP:[^ ]+]] = load [5 x <3 x float>], [5 x <3 x float>]* getelementptr inbounds (%struct.v3f5, %struct.v3f5* @global_v3f5, i32 0, i32 0), align 1 -// CHECK: call [5 x <3 x float>] @func_v3f5([5 x <3 x float>] %[[TMP]]) +// CHECK: %[[TMP:[^ ]+]] = load [5 x <4 x float>], [5 x <4 x float>]* bitcast (%struct.v3f5* @global_v3f5 to [5 x <4 x float>]*), align 16 +// CHECK: call [5 x <4 x float>] @func_v3f5([5 x <4 x float>] %[[TMP]]) struct v3f5 global_v3f5; void call_v3f5(void) { global_v3f5 = func_v3f5(global_v3f5); } // CHECK-LABEL: @call_v3f6 -// CHECK: %[[TMP:[^ ]+]] = load [6 x <3 x float>], [6 x <3 x float>]* getelementptr inbounds (%struct.v3f6, %struct.v3f6* @global_v3f6, i32 0, i32 0), align 1 -// CHECK: call [6 x <3 x float>] @func_v3f6([6 x <3 x float>] %[[TMP]]) +// CHECK: %[[TMP:[^ ]+]] = load [6 x <4 x float>], [6 x <4 x float>]* bitcast (%struct.v3f6* @global_v3f6 to [6 x <4 x float>]*), align 16 +// CHECK: call [6 x <4 x float>] @func_v3f6([6 x <4 x float>] %[[TMP]]) struct v3f6 global_v3f6; void call_v3f6(void) { global_v3f6 = func_v3f6(global_v3f6); } // CHECK-LABEL: @call_v3f7 -// CHECK: %[[TMP:[^ ]+]] = load [7 x <3 x float>], [7 x <3 x float>]* getelementptr inbounds (%struct.v3f7, %struct.v3f7* @global_v3f7, i32 0, i32 0), align 1 -// CHECK: call [7 x <3 x float>] @func_v3f7([7 x <3 x float>] %[[TMP]]) +// CHECK: %[[TMP:[^ ]+]] = load [7 x <4 x float>], [7 x <4 x float>]* bitcast (%struct.v3f7* @global_v3f7 to [7 x <4 x float>]*), align 16 +// CHECK: call [7 x <4 x float>] @func_v3f7([7 x <4 x float>] %[[TMP]]) struct v3f7 global_v3f7; void call_v3f7(void) { global_v3f7 = func_v3f7(global_v3f7); } // CHECK-LABEL: @call_v3f8 -// CHECK: %[[TMP:[^ ]+]] = load [8 x <3 x float>], [8 x <3 x float>]* getelementptr inbounds (%struct.v3f8, %struct.v3f8* @global_v3f8, i32 0, i32 0), align 1 -// CHECK: call [8 x <3 x float>] @func_v3f8([8 x <3 x float>] %[[TMP]]) +// CHECK: %[[TMP:[^ ]+]] = load [8 x <4 x float>], [8 x <4 x float>]* bitcast (%struct.v3f8* @global_v3f8 to [8 x <4 x float>]*), align 16 +// CHECK: call [8 x <4 x float>] @func_v3f8([8 x <4 x float>] %[[TMP]]) struct v3f8 global_v3f8; void call_v3f8(void) { global_v3f8 = func_v3f8(global_v3f8); } @@ -342,14 +342,14 @@ struct v3f9 global_v3f9; void call_v3f9(void) { global_v3f9 = func_v3f9(global_v3f9); } // CHECK-LABEL: @call_v3fab -// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>], [2 x <3 x float>]* bitcast (%struct.v3fab* @global_v3fab to [2 x <3 x float>]*) -// CHECK: call [2 x <3 x float>] @func_v3fab([2 x <3 x float>] %[[TMP]]) +// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x float>], [2 x <4 x float>]* bitcast (%struct.v3fab* @global_v3fab to [2 x <4 x float>]*), align 16 +// CHECK: call [2 x <4 x float>] @func_v3fab([2 x <4 x float>] %[[TMP]]) struct v3fab global_v3fab; void call_v3fab(void) { global_v3fab = func_v3fab(global_v3fab); } // CHECK-LABEL: @call_v3fabc -// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>], [3 x <3 x float>]* bitcast (%struct.v3fabc* @global_v3fabc to [3 x <3 x float>]*) -// CHECK: call [3 x <3 x float>] @func_v3fabc([3 x <3 x float>] %[[TMP]]) +// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x float>], [3 x <4 x float>]* bitcast (%struct.v3fabc* @global_v3fabc to [3 x <4 x float>]*), align 16 +// CHECK: call [3 x <4 x float>] @func_v3fabc([3 x <4 x float>] %[[TMP]]) struct v3fabc global_v3fabc; void call_v3fabc(void) { global_v3fabc = func_v3fabc(global_v3fabc); }