This commit fixes a bug in IRGen where it generates completely broken
code for __fp16 vectors on X86. For example when the following code is
compiled:
half4 hv0, hv1, hv2; // these are vectors of __fp16.
void foo221() {
hv0 = hv1 + hv2;
}
clang generates the following IR, in which two i16 vectors are added:
@hv1 = common global <4 x i16> zeroinitializer, align 8
@hv2 = common global <4 x i16> zeroinitializer, align 8
@hv0 = common global <4 x i16> zeroinitializer, align 8
define void @foo221() {
%0 = load <4 x i16>, <4 x i16>* @hv1, align 8
%1 = load <4 x i16>, <4 x i16>* @hv2, align 8
%add = add <4 x i16> %0, %1
store <4 x i16> %add, <4 x i16>* @hv0, align 8
ret void
}
To fix the bug, this commit uses the code committed in r314056, which
modified clang to promote and truncate __fp16 vectors to and from float
vectors in the AST. It also fixes another IRGen bug where a short value
is assigned to an __fp16 variable without any integer-to-floating-point
conversion, as shown in the following example:
__fp16 a;
short b;
void foo1() {
a = b;
}
@b = common global i16 0, align 2
@a = common global i16 0, align 2
define void @foo1() #0 {
%0 = load i16, i16* @b, align 2
store i16 %0, i16* @a, align 2
ret void
}
rdar://problem/
20625184
Differential Revision: https://reviews.llvm.org/D40112
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@320215
91177308-0d34-0410-b5e6-
96231b3b80d8
return ComplexLongDoubleUsesFP2Ret;
}
+ /// Check whether llvm intrinsics such as llvm.convert.to.fp16 should be used
+ /// to convert to and from __fp16.
+ /// FIXME: This function should be removed once all targets stop using the
+ /// conversion intrinsics.
+ virtual bool useFP16ConversionIntrinsics() const {
+ return true;
+ }
+
/// \brief Specify if mangling based on address space map should be used or
/// not for language specific address spaces
bool useAddressSpaceMapMangling() const {
bool isValidCPUName(StringRef Name) const override;
bool setCPU(const std::string &Name) override;
+ bool useFP16ConversionIntrinsics() const override {
+ return false;
+ }
+
void getTargetDefinesARMV81A(const LangOptions &Opts,
MacroBuilder &Builder) const;
void getTargetDefinesARMV82A(const LangOptions &Opts,
bool setFPMath(StringRef Name) override;
+ bool useFP16ConversionIntrinsics() const override {
+ return false;
+ }
+
void getTargetDefinesARMV81A(const LangOptions &Opts,
MacroBuilder &Builder) const;
return "";
}
+ bool useFP16ConversionIntrinsics() const override {
+ return false;
+ }
+
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;
const llvm::APFloat &Init = Value.getFloat();
if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf() &&
!CGM.getContext().getLangOpts().NativeHalfType &&
- !CGM.getContext().getLangOpts().HalfArgsAndReturns)
+ CGM.getContext().getTargetInfo().useFP16ConversionIntrinsics())
return llvm::ConstantInt::get(CGM.getLLVMContext(),
Init.bitcastToAPInt());
else
if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
// Cast to FP using the intrinsic if the half type itself isn't supported.
if (DstTy->isFloatingPointTy()) {
- if (!CGF.getContext().getLangOpts().HalfArgsAndReturns)
+ if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics())
return Builder.CreateCall(
CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, DstTy),
Src);
// Cast to other types through float, using either the intrinsic or FPExt,
// depending on whether the half type itself is supported
// (as opposed to operations on half, available with NativeHalfType).
- if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+ if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
Src = Builder.CreateCall(
CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
CGF.CGM.FloatTy),
if (SrcTy->isFloatingPointTy()) {
// Use the intrinsic if the half type itself isn't supported
// (as opposed to operations on half, available with NativeHalfType).
- if (!CGF.getContext().getLangOpts().HalfArgsAndReturns)
+ if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics())
return Builder.CreateCall(
CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, SrcTy), Src);
// If the half type is supported, just use an fptrunc.
}
if (DstTy != ResTy) {
- if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+ if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
assert(ResTy->isIntegerTy(16) && "Only half FP requires extra conversion");
Res = Builder.CreateCall(
CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy),
if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
// Another special case: half FP increment should be done via float
- if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+ if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
value = Builder.CreateCall(
CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
CGF.CGM.FloatTy),
value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
- if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+ if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
value = Builder.CreateCall(
CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16,
CGF.CGM.FloatTy),
case BuiltinType::Half:
// Half FP can either be storage-only (lowered to i16) or native.
- ResultType =
- getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T),
- Context.getLangOpts().NativeHalfType ||
- Context.getLangOpts().HalfArgsAndReturns);
+ ResultType = getTypeForFormat(
+ getLLVMContext(), Context.getFloatTypeSemantics(T),
+ Context.getLangOpts().NativeHalfType ||
+ !Context.getTargetInfo().useFP16ConversionIntrinsics());
break;
case BuiltinType::Float:
case BuiltinType::Double:
static bool needsConversionOfHalfVec(bool OpRequiresConversion, ASTContext &Ctx,
QualType SrcType) {
return OpRequiresConversion && !Ctx.getLangOpts().NativeHalfType &&
- Ctx.getLangOpts().HalfArgsAndReturns && isVector(SrcType, Ctx.HalfTy);
+ !Ctx.getTargetInfo().useFP16ConversionIntrinsics() &&
+ isVector(SrcType, Ctx.HalfTy);
}
/// CreateBuiltinBinOp - Creates a new built-in binary operation with
// REQUIRES: arm-registered-target
-// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-linux-gnu %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fnative-half-type %s \
// RUN: | FileCheck %s --check-prefix=NATIVE-HALF
// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fnative-half-type %s \
volatile __fp16 h0 = 0.0, h1 = 1.0, h2;
volatile float f0, f1, f2;
volatile double d0;
+short s0;
void foo(void) {
// CHECK-LABEL: define void @foo()
// Check unary ops
- // NOHALF: [[F16TOF32:call float @llvm.convert.from.fp16.f32]]
- // HALF: [[F16TOF32:fpext half]]
+ // NOTNATIVE: [[F16TOF32:fpext half]]
// CHECK: fptoui float
// NATIVE-HALF: fptoui half
test = (h0);
// CHECK: uitofp i32
- // NOHALF: [[F32TOF16:call i16 @llvm.convert.to.fp16.f32]]
- // HALF: [[F32TOF16:fptrunc float]]
+ // NOTNATIVE: [[F32TOF16:fptrunc float]]
// NATIVE-HALF: uitofp i32 {{.*}} to half
h0 = (test);
// CHECK: [[F16TOF32]]
test = (!h1);
// CHECK: [[F16TOF32]]
// CHECK: fsub float
- // NOHALF: [[F32TOF16]]
- // HALF: [[F32TOF16]]
+ // NOTNATIVE: [[F32TOF16]]
// NATIVE-HALF: fsub half
h1 = -h1;
// CHECK: [[F16TOF32]]
// NATIVE-HALF: fmul half
h1 = h0 * h2;
// CHECK: [[F16TOF32]]
- // NOHALF: [[F32TOF16]]
- // NOHALF: [[F16TOF32]]
// CHECK: fmul float
// CHECK: [[F32TOF16]]
// NATIVE-HALF: fmul half
// NATIVE-HALF: fdiv half
h1 = (h0 / h2);
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fdiv float
// CHECK: [[F32TOF16]]
// NATIVE-HALF: fdiv half
// NATIVE-HALF: fadd half
h1 = (h2 + h0);
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fadd float
// CHECK: [[F32TOF16]]
// NATIVE-HALF: fadd half
// NATIVE-HALF: fsub half
h1 = (h2 - h0);
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fsub float
// CHECK: [[F32TOF16]]
// NATIVE-HALF: fsub half
// NATIVE-HALF: fcmp olt half
test = (h2 < h0);
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fcmp olt float
// NATIVE-HALF: fcmp olt half
test = (h2 < (__fp16)42.0);
// NATIVE-HALF: fcmp ogt half
test = (h0 > h2);
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fcmp ogt float
// NATIVE-HALF: fcmp ogt half
test = ((__fp16)42.0 > h2);
// NATIVE-HALF: fcmp ole half
test = (h2 <= h0);
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fcmp ole float
// NATIVE-HALF: fcmp ole half
test = (h2 <= (__fp16)42.0);
// NATIVE-HALF: fcmp oge half
test = (h0 >= h2);
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fcmp oge float
// NATIVE-HALF: fcmp oge half
test = (h0 >= (__fp16)-2.0);
// NATIVE-HALF: fcmp oeq half
test = (h1 == h2);
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fcmp oeq float
// NATIVE-HALF: fcmp oeq half
test = (h1 == (__fp16)1.0);
// NATIVE-HALF: fcmp une half
test = (h1 != h2);
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fcmp une float
// NATIVE-HALF: fcmp une half
test = (h1 != (__fp16)1.0);
h1 = (h1 ? h2 : h0);
// Check assignments (inc. compound)
h0 = h1;
- // NOHALF: [[F32TOF16]]
- // HALF: store {{.*}} half 0xHC000
+ // NOTNATIVE: store {{.*}} half 0xHC000
// NATIVE-HALF: store {{.*}} half 0xHC000
h0 = (__fp16)-2.0f;
// CHECK: [[F32TOF16]]
// NATIVE-HALF: fadd half
h0 += h1;
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fadd float
// CHECK: [[F32TOF16]]
// NATIVE-HALF: fadd half
// NATIVE-HALF: fsub half
h0 -= h1;
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fsub float
// CHECK: [[F32TOF16]]
// NATIVE-HALF: fsub half
// NATIVE-HALF: fmul half
h0 *= h1;
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fmul float
// CHECK: [[F32TOF16]]
// NATIVE-HALF: fmul half
// NATIVE-HALF: fdiv half
h0 /= h1;
// CHECK: [[F16TOF32]]
- // NOHALF: [[F16TOF32]]
// CHECK: fdiv float
// CHECK: [[F32TOF16]]
// NATIVE-HALF: fdiv half
h0 /= i0;
// Check conversions to/from double
- // NOHALF: call i16 @llvm.convert.to.fp16.f64(
- // HALF: fptrunc double {{.*}} to half
+ // NOTNATIVE: fptrunc double {{.*}} to half
// NATIVE-HALF: fptrunc double {{.*}} to half
h0 = d0;
// CHECK: [[MID:%.*]] = fptrunc double {{%.*}} to float
- // NOHALF: call i16 @llvm.convert.to.fp16.f32(float [[MID]])
- // HALF: fptrunc float [[MID]] to half
+ // NOTNATIVE: fptrunc float [[MID]] to half
// NATIVE-HALF: [[MID:%.*]] = fptrunc double {{%.*}} to float
// NATIVE-HALF: fptrunc float {{.*}} to half
h0 = (float)d0;
- // NOHALF: call double @llvm.convert.from.fp16.f64(
- // HALF: fpext half {{.*}} to double
+ // NOTNATIVE: fpext half {{.*}} to double
// NATIVE-HALF: fpext half {{.*}} to double
d0 = h0;
- // NOHALF: [[MID:%.*]] = call float @llvm.convert.from.fp16.f32(
- // HALF: [[MID:%.*]] = fpext half {{.*}} to float
+ // NOTNATIVE: [[MID:%.*]] = fpext half {{.*}} to float
// CHECK: fpext float [[MID]] to double
// NATIVE-HALF: [[MID:%.*]] = fpext half {{.*}} to float
// NATIVE-HALF: fpext float [[MID]] to double
d0 = (float)h0;
+
+ // NOTNATIVE: [[V1:%.*]] = load i16, i16* @s0
+ // NOTNATIVE: [[CONV:%.*]] = sitofp i16 [[V1]] to float
+ // NOTNATIVE: [[TRUNC:%.*]] = fptrunc float [[CONV]] to half
+ // NOTNATIVE: store volatile half [[TRUNC]], half* @h0
+ h0 = s0;
}
// REQUIRES: arm-registered-target
// RUN: %clang_cc1 -triple arm64-apple-ios9 -emit-llvm -o - -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=CHECK
// RUN: %clang_cc1 -triple armv7-apple-ios9 -emit-llvm -o - -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.13 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK
typedef __fp16 half4 __attribute__ ((vector_size (8)));
typedef short short4 __attribute__ ((vector_size (8)));
// CHECK-DAG: @_ZN12_GLOBAL__N_13f1nE = internal global half 0xH0000, align 2
_Float16 f2n = 33.f16;
-// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2
-// CHECK-X86-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global i16 20512, align 2
+// CHECK-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2
_Float16 arr1n[10];
// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 2
// CHECK-X86-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 16
_Float16 arr2n[] = { 1.2, 3.0, 3.e4 };
-// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2
-// CHECK-X86-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x i16] [i16 15565, i16 16896, i16 30547], align 2
+// CHECK-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2
const volatile _Float16 func1n(const _Float16 &arg) {
return arg + f2n + arr1n[4] - arr2n[1];
// CHECK-X86-DAG: @f1f = global half 0xH0000, align 2
_Float16 f2f = 32.4;
-// CHECK-AARCH64-DAG: @f2f = global half 0xH500D, align 2
-// CHECK-X86-DAG: @f2f = global i16 20493, align 2
+// CHECK-DAG: @f2f = global half 0xH500D, align 2
_Float16 arr1f[10];
// CHECK-AARCH64-DAG: @arr1f = global [10 x half] zeroinitializer, align 2
// CHECK-X86-DAG: @arr1f = global [10 x half] zeroinitializer, align 16
_Float16 arr2f[] = { -1.2, -3.0, -3.e4 };
-// CHECK-AARCH64-DAG: @arr2f = global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2
-// CHECK-X86-DAG: @arr2f = global [3 x i16] [i16 -17203, i16 -15872, i16 -2221], align 2
+// CHECK-DAG: @arr2f = global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2
_Float16 func1f(_Float16 arg);
// CHECK-DAG: call void @_ZN2C1C2EDF16_(%class.C1* %{{.*}}, half %{{.*}})
S1<_Float16> s1 = { 132.f16 };
-// CHECK-AARCH64-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2
-// CHECK-X86-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant { i16 } { i16 22560 }, align 2
+// CHECK-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2
// CHECK-DAG: [[S1:%[0-9]+]] = bitcast %struct.S1* %{{.*}} to i8*
-// CHECK-AARCH64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false)
-// CHECK-X86-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* bitcast ({ i16 }* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false)
+// CHECK-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false)
_Float16 f4l = func1n(f1l) + func1f(f2l) + c1.func1c(f3l) + c1.func2c(f1l) +
func1t(f1l) + s1.mem2 - f1n + f2n;
// CHECK-DAG: store half [[INC]], half* %{{.*}}, align 2
_Float16 arr1l[] = { -1.f16, -0.f16, -11.f16 };
-// CHECK-AARCH64-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2
-// CHECK-X86-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x i16] [i16 -17408, i16 -32768, i16 -13952], align 2
+// CHECK-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2
float cvtf = f2n;
//CHECK-DAG: [[H2F:%[a-z0-9]+]] = fpext half {{%[0-9]+}} to float
template <typename T, typename U> struct S { static int i; };
template <> int S<__fp16, __fp16>::i = 3;
-// CHECK-LABEL: define void @_Z1fPDh(i16* %x)
+// CHECK-LABEL: define void @_Z1fPDh(half* %x)
void f (__fp16 *x) { }
-// CHECK-LABEL: define void @_Z1gPDhS_(i16* %x, i16* %y)
+// CHECK-LABEL: define void @_Z1gPDhS_(half* %x, half* %y)
void g (__fp16 *x, __fp16 *y) { }