Clang has the default FP contraction setting of “-ffp-contract=on”, which
doesn't really mean “on” in the conventional sense of the word, but rather
really means “according to the per-statement effective value of the relevant
pragma”.
Before this patch, Clang has that pragma defaulting to “off”. Since the
“-ffp-contract=on” mode is really an AND of two booleans and the second of them
defaults to “off”, the whole thing effectively defaults to “off”. This patch
changes the default value of the pragma to “on”, thus making the default pair of
booleans (on, on) rather than (on, off). This makes FP optimization slightly
more aggressive than before when not using either “-Ofast”, “-ffast-math”, or
“-ffp-contract=fast”. Even with this patch the compiler still respects
“-ffp-contract=off”.
As per a suggestion by Steve Canon, the added code does _not_ require “-O3” or
higher. This is so as to try our best to preserve identical floating-point
results for unchanged source code compiling for an unchanged target when only
changing from any optimization level in the set (“-O0”, “-O1”, “-O2”, “-O3”) to
any other optimization level in that set. “-Os” and “-Oz” seem to be behaving
identically, i.e. should probably be considered a part of the aforementioned
set, but I have not reviewed this rigorously. “-Ofast” is explicitly _not_ a
member of that set.
Patch authored by Abe Skolnik [a.skolnik@samsung.com] and Stephen Canon [scanon@apple.com].
Differential Revision: https://reviews.llvm.org/D24481
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@282259
91177308-0d34-0410-b5e6-
96231b3b80d8
if (Arch == llvm::Triple::spir || Arch == llvm::Triple::spir64) {
Res.getDiagnosticOpts().Warnings.push_back("spir-compat");
}
+
+ if ((LangOpts.C11 || LangOpts.C99 || LangOpts.CPlusPlus) &&
+ (CodeGenOptions::FPC_On == Res.getCodeGenOpts().getFPContractMode()) &&
+ !LangOpts.CUDA)
+ LangOpts.DefaultFPContract = 1;
+
return Success;
}
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -ffp-contract=off -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// Test new aarch64 intrinsics and types
--- /dev/null
+// RUN: %clang_cc1 -triple=aarch64-unknown -Os -ffp-contract=fast -S -o - %s | FileCheck -check-prefix=CHECK-FAST -check-prefix=CHECK-ALL %s
+// RUN: %clang_cc1 -triple=aarch64-unknown -Os -ffp-contract=on -S -o - %s | FileCheck -check-prefix=CHECK-ON -check-prefix=CHECK-ALL %s
+// RUN: %clang_cc1 -triple=aarch64-unknown -Os -ffp-contract=off -S -o - %s | FileCheck -check-prefix=CHECK-OFF -check-prefix=CHECK-ALL %s
+// RUN: %clang_cc1 -triple=aarch64-unknown -Os -S -o - %s | FileCheck -check-prefix=CHECK-ON -check-prefix=CHECK-ALL %s
+// REQUIRES: aarch64-registered-target
+
+float test1(float x, float y, float z) {
+ return x*y + z;
+ // CHECK-ALL-LABEL: test1:
+ // CHECK-FAST: fmadd
+ // CHECK-ON: fmadd
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fadd
+}
+
+double test2(double x, double y, double z) {
+ z -= x*y;
+ return z;
+ // CHECK-ALL-LABEL: test2:
+ // CHECK-FAST: fmsub
+ // CHECK-ON: fmsub
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fsub
+}
+
+float test3(float x, float y, float z) {
+ float tmp = x*y;
+ return tmp + z;
+ // CHECK-ALL-LABEL: test3:
+ // CHECK-FAST: fmadd
+ // CHECK-ON: fmul
+ // CHECK-ON-NEXT: fadd
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fadd
+}
+
+double test4(double x, double y, double z) {
+ double tmp = x*y;
+ return tmp - z;
+ // CHECK-ALL-LABEL: test4:
+ // CHECK-FAST: fnmsub
+ // CHECK-ON: fmul
+ // CHECK-ON-NEXT: fsub
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fsub
+}
+
+#pragma STDC FP_CONTRACT ON
+
+float test5(float x, float y, float z) {
+ return x*y + z;
+ // CHECK-ALL-LABEL: test5:
+ // CHECK-FAST: fmadd
+ // CHECK-ON: fmadd
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fadd
+}
+
+double test6(double x, double y, double z) {
+ z -= x*y;
+ return z;
+ // CHECK-ALL-LABEL: test6:
+ // CHECK-FAST: fmsub
+ // CHECK-ON: fmsub
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fsub
+}
+
+float test7(float x, float y, float z) {
+ float tmp = x*y;
+ return tmp + z;
+ // CHECK-ALL-LABEL: test7:
+ // CHECK-FAST: fmadd
+ // CHECK-ON: fmul
+ // CHECK-ON-NEXT: fadd
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fadd
+}
+
+double test8(double x, double y, double z) {
+ double tmp = x*y;
+ return tmp - z;
+ // CHECK-ALL-LABEL: test8:
+ // CHECK-FAST: fnmsub
+ // CHECK-ON: fmul
+ // CHECK-ON-NEXT: fsub
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fsub
+}
+
+#pragma STDC FP_CONTRACT OFF
+
+float test9(float x, float y, float z) {
+ return x*y + z;
+ // CHECK-ALL-LABEL: test9:
+ // CHECK-FAST: fmadd
+ // CHECK-ON: fmul
+ // CHECK-ON-NEXT: fadd
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fadd
+}
+
+double test10(double x, double y, double z) {
+ z -= x*y;
+ return z;
+ // CHECK-ALL-LABEL: test10:
+ // CHECK-FAST: fmsub
+ // CHECK-ON: fmul
+ // CHECK-ON-NEXT: fsub
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fsub
+}
+
+float test11(float x, float y, float z) {
+ float tmp = x*y;
+ return tmp + z;
+ // CHECK-ALL-LABEL: test11:
+ // CHECK-FAST: fmadd
+ // CHECK-ON: fmul
+ // CHECK-ON-NEXT: fadd
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fadd
+}
+
+double test12(double x, double y, double z) {
+ double tmp = x*y;
+ return tmp - z;
+ // CHECK-ALL-LABEL: test12:
+ // CHECK-FAST: fnmsub
+ // CHECK-ON: fmul
+ // CHECK-ON-NEXT: fsub
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fsub
+}
+
+#pragma STDC FP_CONTRACT DEFAULT
+
+float test17(float x, float y, float z) {
+ return x*y + z;
+ // CHECK-ALL-LABEL: test17:
+ // CHECK-FAST: fmadd
+ // CHECK-ON: fmadd
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fadd
+}
+
+double test18(double x, double y, double z) {
+ z -= x*y;
+ return z;
+ // CHECK-ALL-LABEL: test18:
+ // CHECK-FAST: fmsub
+ // CHECK-ON: fmsub
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fsub
+}
+
+float test19(float x, float y, float z) {
+ float tmp = x*y;
+ return tmp + z;
+ // CHECK-ALL-LABEL: test19:
+ // CHECK-FAST: fmadd
+ // CHECK-ON: fmul
+ // CHECK-ON-NEXT: fadd
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fadd
+}
+
+double test20(double x, double y, double z) {
+ double tmp = x*y;
+ return tmp - z;
+ // CHECK-ALL-LABEL: test20:
+ // CHECK-FAST: fnmsub
+ // CHECK-ON: fmul
+ // CHECK-ON-NEXT: fsub
+ // CHECK-OFF: fmul
+ // CHECK-OFF-NEXT: fsub
+}
// RUN: %clang_cc1 -O3 -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s
-// Is FP_CONTRACT honored in a simple case?
-float fp_contract_1(float a, float b, float c) {
-// CHECK: _Z13fp_contract_1fff
+// Is FP_CONTRACT on by default, at least at -O3?
+float fp_contract_8(float a, float b, float c) {
+// CHECK: _Z13fp_contract_8fff
// CHECK: tail call float @llvm.fmuladd
- #pragma STDC FP_CONTRACT ON
return a * b + c;
}
// CHECK: _Z13fp_contract_2fff
// CHECK: %[[M:.+]] = fmul float %a, %b
// CHECK-NEXT: fadd float %[[M]], %c
+ #pragma STDC FP_CONTRACT OFF
{
#pragma STDC FP_CONTRACT ON
}
}
// Does FP_CONTRACT survive template instantiation?
-class Foo {};
-Foo operator+(Foo, Foo);
template <typename T>
T template_muladd(T a, T b, T c) {
return a * b + c;
}
+// Does FP_CONTRACT inside a function override the same in the file scope?
+float fp_contract_1(float a, float b, float c) {
+// CHECK: _Z13fp_contract_1fff
+// CHECK: tail call float @llvm.fmuladd
+ #pragma STDC FP_CONTRACT ON
+ return a * b + c;
+}
+
+
// If the multiply has multiple uses, don't produce fmuladd.
// This used to assert (PR25719):
// https://llvm.org/bugs/show_bug.cgi?id=25719
-float fp_contract_7(float a, float b, float c) {
+float fp_contract_7(float a, float b, float c, float& d_passed_by_ref) {
// CHECK: _Z13fp_contract_7fff
// CHECK: %[[M:.+]] = fmul float %b, 2.000000e+00
-// CHECK-NEXT: fsub float %[[M]], %c
#pragma STDC FP_CONTRACT ON
- return (a = 2 * b) - c;
+ return (d_passed_by_ref = 2 * b) - c;
}
--- /dev/null
+// RUN: %clang_cc1 -triple aarch64 -O0 -S -o - %s | FileCheck %s --check-prefix ALL_BUILDS
+// RUN: %clang_cc1 -triple aarch64 -O1 -S -o - %s | FileCheck %s --check-prefixes ALL_BUILDS,NON_O0
+// RUN: %clang_cc1 -triple aarch64 -O2 -S -o - %s | FileCheck %s --check-prefixes ALL_BUILDS,NON_O0
+// RUN: %clang_cc1 -triple aarch64 -O3 -S -o - %s | FileCheck %s --check-prefixes ALL_BUILDS,NON_O0
+
+// REQUIRES: aarch64-registered-target
+
+// ALL_BUILDS-LABEL: fmadd_double:
+// ALL_BUILDS: fmadd d0, d{{[0-7]}}, d{{[0-7]}}, d{{[0-7]}}
+// NON_O0-NEXT: ret
+double fmadd_double(double a, double b, double c) {
+ return a*b+c;
+}
+
+// ALL_BUILDS: fmadd_single:
+// ALL_BUILDS: fmadd s0, s{{[0-7]}}, s{{[0-7]}}, s{{[0-7]}}
+// NON_O0-NEXT: ret
+float fmadd_single(float a, float b, float c) {
+ return a*b+c;
+}
+
// DEPRECATED-OFF-CHECK-NOT: -fdeprecated-macro
// RUN: %clang -### -S -ffp-contract=fast %s 2>&1 | FileCheck -check-prefix=FP-CONTRACT-FAST-CHECK %s
-// RUN: %clang -### -S -ffast-math %s 2>&1 | FileCheck -check-prefix=FP-CONTRACT-FAST-CHECK %s
-// RUN: %clang -### -S -ffp-contract=off %s 2>&1 | FileCheck -check-prefix=FP-CONTRACT-OFF-CHECK %s
+// RUN: %clang -### -S -ffast-math %s 2>&1 | FileCheck -check-prefix=FP-CONTRACT-FAST-CHECK %s
+// RUN: %clang -### -S -ffp-contract=off %s 2>&1 | FileCheck -check-prefix=FP-CONTRACT-OFF-CHECK %s
+// RUN: %clang -### -S -ffp-contract=on %s 2>&1 | FileCheck -check-prefix=FP-CONTRACT-ON-CHECK %s
+
// FP-CONTRACT-FAST-CHECK: -ffp-contract=fast
-// FP-CONTRACT-OFF-CHECK: -ffp-contract=off
+// FP-CONTRACT-OFF-CHECK: -ffp-contract=off
+// FP-CONTRACT-ON-CHECK: -ffp-contract=on
// RUN: %clang -### -S -funroll-loops %s 2>&1 | FileCheck -check-prefix=CHECK-UNROLL-LOOPS %s
// RUN: %clang -### -S -fno-unroll-loops %s 2>&1 | FileCheck -check-prefix=CHECK-NO-UNROLL-LOOPS %s