From 2ae950726990c09f790931edcf9e6763851ee077 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 3 Jun 2012 21:46:30 +0000 Subject: [PATCH] Add fma feature flag for Intel FMA instructions. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@157904 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Basic/Targets.cpp | 23 ++++++++- test/Preprocessor/predefined-arch-macros.c | 56 ++++++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index b5bfddac52..b52baba22b 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -1245,6 +1245,7 @@ class X86TargetInfo : public TargetInfo { bool HasPOPCNT; bool HasSSE4a; bool HasFMA4; + bool HasFMA; /// \brief Enumeration of all of the X86 CPUs supported by Clang. /// @@ -1391,7 +1392,7 @@ public: : TargetInfo(triple), SSELevel(NoSSE), MMX3DNowLevel(NoMMX3DNow), HasAES(false), HasPCLMUL(false), HasLZCNT(false), HasBMI(false), HasBMI2(false), HasPOPCNT(false), HasSSE4a(false), HasFMA4(false), - CPU(CK_Generic) { + HasFMA(false), CPU(CK_Generic) { BigEndian = false; LongDoubleFormat = &llvm::APFloat::x87DoubleExtended; } @@ -1581,6 +1582,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap &Features) const { Features["bmi2"] = false; Features["popcnt"] = false; Features["fma4"] = false; + Features["fma"] = false; // FIXME: This *really* should not be here. @@ -1650,6 +1652,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap &Features) const { setFeatureEnabled(Features, "lzcnt", true); setFeatureEnabled(Features, "bmi", true); setFeatureEnabled(Features, "bmi2", true); + setFeatureEnabled(Features, "fma", true); break; case CK_K6: case CK_WinChipC6: @@ -1755,6 +1758,10 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap &Features, Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] = Features["ssse3"] = Features["sse41"] = Features["sse42"] = Features["popcnt"] = Features["avx"] = Features["avx2"] = true; + else if (Name == "fma") + Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] = + Features["ssse3"] = Features["sse41"] = Features["sse42"] = + Features["popcnt"] = Features["avx"] = Features["fma"] = true; else if (Name == "fma4") Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] = Features["ssse3"] = Features["sse41"] = Features["sse42"] = @@ -1799,9 +1806,12 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap &Features, else if (Name == "pclmul") Features["pclmul"] = false; else if (Name == "avx") - Features["avx"] = Features["avx2"] = Features["fma4"] = false; + Features["avx"] = Features["avx2"] = Features["fma"] = + Features["fma4"] = false; else if (Name == "avx2") Features["avx2"] = false; + else if (Name == "fma") + Features["fma"] = false; else if (Name == "sse4a") Features["sse4a"] = Features["fma4"] = false; else if (Name == "lzcnt") @@ -1870,6 +1880,11 @@ void X86TargetInfo::HandleTargetFeatures(std::vector &Features) { continue; } + if (Feature == "fma") { + HasFMA = true; + continue; + } + assert(Features[i][0] == '+' && "Invalid target feature!"); X86SSEEnum Level = llvm::StringSwitch(Feature) .Case("avx2", AVX2) @@ -2073,6 +2088,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasFMA4) Builder.defineMacro("__FMA4__"); + if (HasFMA) + Builder.defineMacro("__FMA__"); + // Each case falls through to the previous one here. switch (SSELevel) { case AVX2: @@ -2136,6 +2154,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx2", SSELevel >= AVX2) .Case("bmi", HasBMI) .Case("bmi2", HasBMI2) + .Case("fma", HasFMA) .Case("fma4", HasFMA4) .Case("lzcnt", HasLZCNT) .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow) diff --git a/test/Preprocessor/predefined-arch-macros.c b/test/Preprocessor/predefined-arch-macros.c index f4def04378..df53942b93 100644 --- a/test/Preprocessor/predefined-arch-macros.c +++ b/test/Preprocessor/predefined-arch-macros.c @@ -417,6 +417,7 @@ // CHECK_COREI7_AVX_M32: #define __AES__ 1 // CHECK_COREI7_AVX_M32: #define __AVX__ 1 // CHECK_COREI7_AVX_M32: #define __MMX__ 1 +// CHECK_COREI7_AVX_M32: #define __PCLMUL__ 1 // CHECK_COREI7_AVX_M32: #define __SSE2__ 1 // CHECK_COREI7_AVX_M32: #define __SSE3__ 1 // CHECK_COREI7_AVX_M32: #define __SSE4_1__ 1 @@ -435,6 +436,7 @@ // CHECK_COREI7_AVX_M64: #define __AES__ 1 // CHECK_COREI7_AVX_M64: #define __AVX__ 1 // CHECK_COREI7_AVX_M64: #define __MMX__ 1 +// CHECK_COREI7_AVX_M64: #define __PCLMUL__ 1 // CHECK_COREI7_AVX_M64: #define __SSE2_MATH__ 1 // CHECK_COREI7_AVX_M64: #define __SSE2__ 1 // CHECK_COREI7_AVX_M64: #define __SSE3__ 1 @@ -457,6 +459,7 @@ // CHECK_CORE_AVX_I_M32: #define __AES__ 1 // CHECK_CORE_AVX_I_M32: #define __AVX__ 1 // CHECK_CORE_AVX_I_M32: #define __MMX__ 1 +// CHECK_CORE_AVX_I_M32: #define __PCLMUL__ 1 // CHECK_CORE_AVX_I_M32: #define __SSE2__ 1 // CHECK_CORE_AVX_I_M32: #define __SSE3__ 1 // CHECK_CORE_AVX_I_M32: #define __SSE4_1__ 1 @@ -475,6 +478,7 @@ // CHECK_CORE_AVX_I_M64: #define __AES__ 1 // CHECK_CORE_AVX_I_M64: #define __AVX__ 1 // CHECK_CORE_AVX_I_M64: #define __MMX__ 1 +// CHECK_CORE_AVX_I_M64: #define __PCLMUL__ 1 // CHECK_CORE_AVX_I_M64: #define __SSE2_MATH__ 1 // CHECK_CORE_AVX_I_M64: #define __SSE2__ 1 // CHECK_CORE_AVX_I_M64: #define __SSE3__ 1 @@ -491,6 +495,58 @@ // CHECK_CORE_AVX_I_M64: #define __x86_64 1 // CHECK_CORE_AVX_I_M64: #define __x86_64__ 1 // +// RUN: %clang -march=core-avx2 -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck %s -check-prefix=CHECK_CORE_AVX2_M32 +// CHECK_CORE_AVX2_M32: #define __AES__ 1 +// CHECK_CORE_AVX2_M32: #define __AVX__ 1 +// CHECK_CORE_AVX2_M32: #define __BMI2__ 1 +// CHECK_CORE_AVX2_M32: #define __BMI__ 1 +// CHECK_CORE_AVX2_M32: #define __FMA__ 1 +// CHECK_CORE_AVX2_M32: #define __LZCNT__ 1 +// CHECK_CORE_AVX2_M32: #define __MMX__ 1 +// CHECK_CORE_AVX2_M32: #define __PCLMUL__ 1 +// CHECK_CORE_AVX2_M32: #define __POPCNT__ 1 +// CHECK_CORE_AVX2_M32: #define __SSE2__ 1 +// CHECK_CORE_AVX2_M32: #define __SSE3__ 1 +// CHECK_CORE_AVX2_M32: #define __SSE4_1__ 1 +// CHECK_CORE_AVX2_M32: #define __SSE4_2__ 1 +// CHECK_CORE_AVX2_M32: #define __SSE__ 1 +// CHECK_CORE_AVX2_M32: #define __SSSE3__ 1 +// CHECK_CORE_AVX2_M32: #define __corei7 1 +// CHECK_CORE_AVX2_M32: #define __corei7__ 1 +// CHECK_CORE_AVX2_M32: #define __i386 1 +// CHECK_CORE_AVX2_M32: #define __i386__ 1 +// CHECK_CORE_AVX2_M32: #define __tune_corei7__ 1 +// CHECK_CORE_AVX2_M32: #define i386 1 +// RUN: %clang -march=core-avx2 -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck %s -check-prefix=CHECK_CORE_AVX2_M64 +// CHECK_CORE_AVX2_M64: #define __AES__ 1 +// CHECK_CORE_AVX2_M64: #define __AVX__ 1 +// CHECK_CORE_AVX2_M64: #define __BMI2__ 1 +// CHECK_CORE_AVX2_M64: #define __BMI__ 1 +// CHECK_CORE_AVX2_M64: #define __FMA__ 1 +// CHECK_CORE_AVX2_M64: #define __LZCNT__ 1 +// CHECK_CORE_AVX2_M64: #define __MMX__ 1 +// CHECK_CORE_AVX2_M64: #define __PCLMUL__ 1 +// CHECK_CORE_AVX2_M64: #define __POPCNT__ 1 +// CHECK_CORE_AVX2_M64: #define __SSE2_MATH__ 1 +// CHECK_CORE_AVX2_M64: #define __SSE2__ 1 +// CHECK_CORE_AVX2_M64: #define __SSE3__ 1 +// CHECK_CORE_AVX2_M64: #define __SSE4_1__ 1 +// CHECK_CORE_AVX2_M64: #define __SSE4_2__ 1 +// CHECK_CORE_AVX2_M64: #define __SSE_MATH__ 1 +// CHECK_CORE_AVX2_M64: #define __SSE__ 1 +// CHECK_CORE_AVX2_M64: #define __SSSE3__ 1 +// CHECK_CORE_AVX2_M64: #define __amd64 1 +// CHECK_CORE_AVX2_M64: #define __amd64__ 1 +// CHECK_CORE_AVX2_M64: #define __corei7 1 +// CHECK_CORE_AVX2_M64: #define __corei7__ 1 +// CHECK_CORE_AVX2_M64: #define __tune_corei7__ 1 +// CHECK_CORE_AVX2_M64: #define __x86_64 1 +// CHECK_CORE_AVX2_M64: #define __x86_64__ 1 +// // RUN: %clang -march=atom -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck %s -check-prefix=CHECK_ATOM_M32 -- 2.40.0