From 15dbacc7cb0e01256345d85efdf1d43eec71b9f9 Mon Sep 17 00:00:00 2001 From: Yunzhong Gao Date: Wed, 16 Oct 2013 19:07:02 +0000 Subject: [PATCH] Enabling 3DNow! prefetch instruction support for a few AMD processors in the clang front end. This change will allow the __PRFCHW__ macro to be set on these processors and hence include prfchwintrin.h in x86intrin.h header. Support for the intrinsic itself seems to have already been added in r178041. Differential Revision: http://llvm-reviews.chandlerc.com/D1934 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@192829 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Basic/Targets.cpp | 11 +++++++++++ test/Preprocessor/predefined-arch-macros.c | 8 ++++++++ test/Preprocessor/x86_target_features.c | 21 +++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index 09e7dad063..701c0a9e70 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -2120,6 +2120,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap &Features) const { setFeatureEnabledImpl(Features, "cx16", true); setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "popcnt", true); + setFeatureEnabledImpl(Features, "prfchw", true); break; case CK_BTVER2: setFeatureEnabledImpl(Features, "avx", true); @@ -2127,6 +2128,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap &Features) const { setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); + setFeatureEnabledImpl(Features, "prfchw", true); setFeatureEnabledImpl(Features, "bmi", true); setFeatureEnabledImpl(Features, "f16c", true); setFeatureEnabledImpl(Features, "cx16", true); @@ -2136,6 +2138,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap &Features) const { setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); + setFeatureEnabledImpl(Features, "prfchw", true); setFeatureEnabledImpl(Features, "cx16", true); break; case CK_BDVER2: @@ -2143,6 +2146,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap &Features) const { setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); + setFeatureEnabledImpl(Features, "prfchw", true); setFeatureEnabledImpl(Features, "bmi", true); setFeatureEnabledImpl(Features, "fma", true); setFeatureEnabledImpl(Features, "f16c", true); @@ -2467,6 +2471,13 @@ bool X86TargetInfo::HandleTargetFeatures(std::vector &Features, Features.push_back("+popcnt"); } + // Enable prfchw if 3DNow! is enabled and prfchw is not explicitly disabled. + if (!HasPRFCHW && MMX3DNowLevel >= AMD3DNow && + std::find(Features.begin(), Features.end(), "-prfchw") == Features.end()){ + HasPRFCHW = true; + Features.push_back("+prfchw"); + } + // LLVM doesn't have a separate switch for fpmath, so only accept it if it // matches the selected sse level. if (FPMath == FP_SSE && SSELevel < SSE1) { diff --git a/test/Preprocessor/predefined-arch-macros.c b/test/Preprocessor/predefined-arch-macros.c index bcfe90f29c..45e317a3d8 100644 --- a/test/Preprocessor/predefined-arch-macros.c +++ b/test/Preprocessor/predefined-arch-macros.c @@ -1166,6 +1166,7 @@ // CHECK_BTVER1_M32: #define __LZCNT__ 1 // CHECK_BTVER1_M32: #define __MMX__ 1 // CHECK_BTVER1_M32: #define __POPCNT__ 1 +// CHECK_BTVER1_M32: #define __PRFCHW__ 1 // CHECK_BTVER1_M32: #define __SSE2_MATH__ 1 // CHECK_BTVER1_M32: #define __SSE2__ 1 // CHECK_BTVER1_M32: #define __SSE3__ 1 @@ -1186,6 +1187,7 @@ // CHECK_BTVER1_M64: #define __LZCNT__ 1 // CHECK_BTVER1_M64: #define __MMX__ 1 // CHECK_BTVER1_M64: #define __POPCNT__ 1 +// CHECK_BTVER1_M64: #define __PRFCHW__ 1 // CHECK_BTVER1_M64: #define __SSE2_MATH__ 1 // CHECK_BTVER1_M64: #define __SSE2__ 1 // CHECK_BTVER1_M64: #define __SSE3__ 1 @@ -1210,6 +1212,7 @@ // CHECK_BTVER2_M32: #define __LZCNT__ 1 // CHECK_BTVER2_M32: #define __MMX__ 1 // CHECK_BTVER2_M32: #define __POPCNT__ 1 +// CHECK_BTVER2_M32: #define __PRFCHW__ 1 // CHECK_BTVER2_M32: #define __SSE2_MATH__ 1 // CHECK_BTVER2_M32: #define __SSE2__ 1 // CHECK_BTVER2_M32: #define __SSE3__ 1 @@ -1232,6 +1235,7 @@ // CHECK_BTVER2_M64: #define __LZCNT__ 1 // CHECK_BTVER2_M64: #define __MMX__ 1 // CHECK_BTVER2_M64: #define __POPCNT__ 1 +// CHECK_BTVER2_M64: #define __PRFCHW__ 1 // CHECK_BTVER2_M64: #define __SSE2_MATH__ 1 // CHECK_BTVER2_M64: #define __SSE2__ 1 // CHECK_BTVER2_M64: #define __SSE3__ 1 @@ -1258,6 +1262,7 @@ // CHECK_BDVER1_M32: #define __MMX__ 1 // CHECK_BDVER1_M32: #define __PCLMUL__ 1 // CHECK_BDVER1_M32: #define __POPCNT__ 1 +// CHECK_BDVER1_M32: #define __PRFCHW__ 1 // CHECK_BDVER1_M32: #define __SSE2_MATH__ 1 // CHECK_BDVER1_M32: #define __SSE2__ 1 // CHECK_BDVER1_M32: #define __SSE3__ 1 @@ -1285,6 +1290,7 @@ // CHECK_BDVER1_M64: #define __MMX__ 1 // CHECK_BDVER1_M64: #define __PCLMUL__ 1 // CHECK_BDVER1_M64: #define __POPCNT__ 1 +// CHECK_BDVER1_M64: #define __PRFCHW__ 1 // CHECK_BDVER1_M64: #define __SSE2_MATH__ 1 // CHECK_BDVER1_M64: #define __SSE2__ 1 // CHECK_BDVER1_M64: #define __SSE3__ 1 @@ -1317,6 +1323,7 @@ // CHECK_BDVER2_M32: #define __MMX__ 1 // CHECK_BDVER2_M32: #define __PCLMUL__ 1 // CHECK_BDVER2_M32: #define __POPCNT__ 1 +// CHECK_BDVER2_M32: #define __PRFCHW__ 1 // CHECK_BDVER2_M32: #define __SSE2_MATH__ 1 // CHECK_BDVER2_M32: #define __SSE2__ 1 // CHECK_BDVER2_M32: #define __SSE3__ 1 @@ -1348,6 +1355,7 @@ // CHECK_BDVER2_M64: #define __MMX__ 1 // CHECK_BDVER2_M64: #define __PCLMUL__ 1 // CHECK_BDVER2_M64: #define __POPCNT__ 1 +// CHECK_BDVER2_M64: #define __PRFCHW__ 1 // CHECK_BDVER2_M64: #define __SSE2_MATH__ 1 // CHECK_BDVER2_M64: #define __SSE2__ 1 // CHECK_BDVER2_M64: #define __SSE3__ 1 diff --git a/test/Preprocessor/x86_target_features.c b/test/Preprocessor/x86_target_features.c index 40c4e6440c..751c8ae958 100644 --- a/test/Preprocessor/x86_target_features.c +++ b/test/Preprocessor/x86_target_features.c @@ -215,3 +215,24 @@ // RUN: %clang -target i386-unknown-unknown -march=pentiumpro -mcx16 -x c -E -dM -o - %s | FileCheck --check-prefix=MCX16 %s // MCX16: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mprfchw -x c -E -dM -o - %s | FileCheck --check-prefix=PRFCHW %s + +// PRFCHW: #define __PRFCHW__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=btver2 -mno-prfchw -x c -E -dM -o - %s | FileCheck --check-prefix=NOPRFCHW %s + +// NOPRFCHW-NOT: #define __PRFCHW__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -m3dnow -x c -E -dM -o - %s | FileCheck --check-prefix=3DNOWPRFCHW %s + +// 3DNOWPRFCHW: #define __PRFCHW__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mno-prfchw -m3dnow -x c -E -dM -o - %s | FileCheck --check-prefix=3DNOWNOPRFCHW %s + +// 3DNOWNOPRFCHW-NOT: #define __PRFCHW__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mprfchw -mno-3dnow -x c -E -dM -o - %s | FileCheck --check-prefix=NO3DNOWPRFCHW %s + +// NO3DNOWPRFCHW: #define __PRFCHW__ 1 + -- 2.40.0