From f7558e5102a025063b375fa1b9d0d78a15713fc1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 10 Aug 2017 20:28:30 +0000 Subject: [PATCH] [X86] Implement __builtin_cpu_is This patch adds support for __builtin_cpu_is. I've tried to match the strings supported to the latest version of gcc. Differential Revision: https://reviews.llvm.org/D35449 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@310657 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 1 + include/clang/Basic/DiagnosticSemaKinds.td | 1 + include/clang/Basic/TargetInfo.h | 4 + lib/Basic/Targets/X86.cpp | 38 +++++++ lib/Basic/Targets/X86.h | 2 + lib/CodeGen/CGBuiltin.cpp | 117 +++++++++++++++++++++ lib/Sema/SemaChecking.cpp | 23 ++++ test/CodeGen/builtin-cpu-is.c | 53 ++++++++++ test/CodeGen/target-builtin-noerror.c | 32 ++++++ test/Sema/builtin-cpu-supports.c | 6 ++ 10 files changed, 277 insertions(+) create mode 100644 test/CodeGen/builtin-cpu-is.c diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 6d3a478ac3..af9796e54b 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -33,6 +33,7 @@ // TODO: Make this somewhat generic so that other backends // can use it? BUILTIN(__builtin_cpu_supports, "bcC*", "nc") +BUILTIN(__builtin_cpu_is, "bcC*", "nc") // Undefined Values // diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 8dfd0eb89a..ef74dea811 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -583,6 +583,7 @@ def warn_redecl_library_builtin : Warning< def err_builtin_definition : Error<"definition of builtin function %0">; def err_arm_invalid_specialreg : Error<"invalid special register for builtin">; def err_invalid_cpu_supports : Error<"invalid cpu feature string for builtin">; +def err_invalid_cpu_is : Error<"invalid cpu name for builtin">; def err_builtin_needs_feature : Error<"%0 needs target feature %1">; def err_function_needs_feature : Error<"always_inline function %1 requires target feature '%2', but would " diff --git a/include/clang/Basic/TargetInfo.h b/include/clang/Basic/TargetInfo.h index 5e3cf0b315..de5562ce7c 100644 --- a/include/clang/Basic/TargetInfo.h +++ b/include/clang/Basic/TargetInfo.h @@ -911,6 +911,10 @@ public: // argument. virtual bool validateCpuSupports(StringRef Name) const { return false; } + // \brief Validate the contents of the __builtin_cpu_is(const char*) + // argument. + virtual bool validateCpuIs(StringRef Name) const { return false; } + // \brief Returns maximal number of args passed in registers. unsigned getRegParmMax() const { assert(RegParmMax < 7 && "RegParmMax value is larger than AST can handle"); diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp index 5c48850cb4..fc0c9c85ef 100644 --- a/lib/Basic/Targets/X86.cpp +++ b/lib/Basic/Targets/X86.cpp @@ -1307,6 +1307,44 @@ bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const { .Default(false); } +// We can't use a generic validation scheme for the cpus accepted here +// versus subtarget cpus accepted in the target attribute because the +// variables intitialized by the runtime only support the below currently +// rather than the full range of cpus. +bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const { + return llvm::StringSwitch(FeatureStr) + .Case("amd", true) + .Case("amdfam10h", true) + .Case("amdfam15h", true) + .Case("atom", true) + .Case("barcelona", true) + .Case("bdver1", true) + .Case("bdver2", true) + .Case("bdver3", true) + .Case("bdver4", true) + .Case("bonnell", true) + .Case("broadwell", true) + .Case("btver1", true) + .Case("btver2", true) + .Case("core2", true) + .Case("corei7", true) + .Case("haswell", true) + .Case("intel", true) + .Case("istanbul", true) + .Case("ivybridge", true) + .Case("knl", true) + .Case("nehalem", true) + .Case("sandybridge", true) + .Case("shanghai", true) + .Case("silvermont", true) + .Case("skylake", true) + .Case("skylake-avx512", true) + .Case("slm", true) + .Case("westmere", true) + .Case("znver1", true) + .Default(false); +} + bool X86TargetInfo::validateAsmConstraint( const char *&Name, TargetInfo::ConstraintInfo &Info) const { switch (*Name) { diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h index 671b8c9c17..34c7bdfbe3 100644 --- a/lib/Basic/Targets/X86.h +++ b/lib/Basic/Targets/X86.h @@ -382,6 +382,8 @@ public: bool validateCpuSupports(StringRef Name) const override; + bool validateCpuIs(StringRef Name) const override; + bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &info) const override; diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 1a9ff26d83..2c8a6e0737 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -7287,8 +7287,125 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } +static Value *EmitX86CpuIs(CodeGenFunction &CGF, const CallExpr *E) { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast(CPUExpr)->getString(); + + // This enum contains the vendor, type, and subtype enums from the + // runtime library concatenated together. The _START labels mark + // the start and are used to adjust the value into the correct + // encoding space. + enum X86CPUs { + INTEL = 1, + AMD, + CPU_TYPE_START, + INTEL_BONNELL, + INTEL_CORE2, + INTEL_COREI7, + AMDFAM10H, + AMDFAM15H, + INTEL_SILVERMONT, + INTEL_KNL, + AMD_BTVER1, + AMD_BTVER2, + CPU_SUBTYPE_START, + INTEL_COREI7_NEHALEM, + INTEL_COREI7_WESTMERE, + INTEL_COREI7_SANDYBRIDGE, + AMDFAM10H_BARCELONA, + AMDFAM10H_SHANGHAI, + AMDFAM10H_ISTANBUL, + AMDFAM15H_BDVER1, + AMDFAM15H_BDVER2, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, + INTEL_COREI7_IVYBRIDGE, + INTEL_COREI7_HASWELL, + INTEL_COREI7_BROADWELL, + INTEL_COREI7_SKYLAKE, + INTEL_COREI7_SKYLAKE_AVX512, + }; + + X86CPUs CPU = + StringSwitch(CPUStr) + .Case("amd", AMD) + .Case("amdfam10h", AMDFAM10H) + .Case("amdfam15h", AMDFAM15H) + .Case("atom", INTEL_BONNELL) + .Case("barcelona", AMDFAM10H_BARCELONA) + .Case("bdver1", AMDFAM15H_BDVER1) + .Case("bdver2", AMDFAM15H_BDVER2) + .Case("bdver3", AMDFAM15H_BDVER3) + .Case("bdver4", AMDFAM15H_BDVER4) + .Case("bonnell", INTEL_BONNELL) + .Case("broadwell", INTEL_COREI7_BROADWELL) + .Case("btver1", AMD_BTVER1) + .Case("btver2", AMD_BTVER2) + .Case("core2", INTEL_CORE2) + .Case("corei7", INTEL_COREI7) + .Case("haswell", INTEL_COREI7_HASWELL) + .Case("intel", INTEL) + .Case("istanbul", AMDFAM10H_ISTANBUL) + .Case("ivybridge", INTEL_COREI7_IVYBRIDGE) + .Case("knl", INTEL_KNL) + .Case("nehalem", INTEL_COREI7_NEHALEM) + .Case("sandybridge", INTEL_COREI7_SANDYBRIDGE) + .Case("shanghai", AMDFAM10H_SHANGHAI) + .Case("silvermont", INTEL_SILVERMONT) + .Case("skylake", INTEL_COREI7_SKYLAKE) + .Case("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512) + .Case("slm", INTEL_SILVERMONT) + .Case("westmere", INTEL_COREI7_WESTMERE) + .Case("znver1", AMDFAM17H_ZNVER1); + + llvm::Type *Int32Ty = CGF.Builder.getInt32Ty(); + + // Matching the struct layout from the compiler-rt/libgcc structure that is + // filled in: + // unsigned int __cpu_vendor; + // unsigned int __cpu_type; + // unsigned int __cpu_subtype; + // unsigned int __cpu_features[1]; + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, + llvm::ArrayType::get(Int32Ty, 1)); + + // Grab the global __cpu_model. + llvm::Constant *CpuModel = CGF.CGM.CreateRuntimeVariable(STy, "__cpu_model"); + + // Calculate the index needed to access the correct field based on the + // range. Also adjust the expected value. + unsigned Index; + unsigned Value; + if (CPU > CPU_SUBTYPE_START) { + Index = 2; + Value = CPU - CPU_SUBTYPE_START; + } else if (CPU > CPU_TYPE_START) { + Index = 1; + Value = CPU - CPU_TYPE_START; + } else { + Index = 0; + Value = CPU; + } + + // Grab the appropriate field from __cpu_model. + llvm::Value *Idxs[] = { + ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, Index) + }; + llvm::Value *CpuValue = CGF.Builder.CreateGEP(STy, CpuModel, Idxs); + CpuValue = CGF.Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4)); + + // Check the value of the field against the requested value. + return CGF.Builder.CreateICmpEQ(CpuValue, + llvm::ConstantInt::get(Int32Ty, Value)); +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { + if (BuiltinID == X86::BI__builtin_cpu_is) + return EmitX86CpuIs(*this, E); + SmallVector Ops; // Find out if any arguments are required to be integer constant expressions. diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 0112bffa29..e75c15a37c 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -1841,6 +1841,26 @@ static bool SemaBuiltinCpuSupports(Sema &S, CallExpr *TheCall) { return false; } +/// SemaBuiltinCpuIs - Handle __builtin_cpu_is(char *). +/// This checks that the target supports __builtin_cpu_is and +/// that the string argument is constant and valid. +static bool SemaBuiltinCpuIs(Sema &S, CallExpr *TheCall) { + Expr *Arg = TheCall->getArg(0); + + // Check if the argument is a string literal. + if (!isa(Arg->IgnoreParenImpCasts())) + return S.Diag(TheCall->getLocStart(), diag::err_expr_not_string_literal) + << Arg->getSourceRange(); + + // Check the contents of the string. + StringRef Feature = + cast(Arg->IgnoreParenImpCasts())->getString(); + if (!S.Context.getTargetInfo().validateCpuIs(Feature)) + return S.Diag(TheCall->getLocStart(), diag::err_invalid_cpu_is) + << Arg->getSourceRange(); + return false; +} + // Check if the rounding mode is legal. bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { // Indicates if this instruction has rounding control or just SAE. @@ -2154,6 +2174,9 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (BuiltinID == X86::BI__builtin_cpu_supports) return SemaBuiltinCpuSupports(*this, TheCall); + if (BuiltinID == X86::BI__builtin_cpu_is) + return SemaBuiltinCpuIs(*this, TheCall); + // If the intrinsic has rounding or SAE make sure its valid. if (CheckX86BuiltinRoundingOrSAE(BuiltinID, TheCall)) return true; diff --git a/test/CodeGen/builtin-cpu-is.c b/test/CodeGen/builtin-cpu-is.c new file mode 100644 index 0000000000..f2a5f54a0c --- /dev/null +++ b/test/CodeGen/builtin-cpu-is.c @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm < %s| FileCheck %s + +// Test that we have the structure definition, the gep offsets, the name of the +// global, the bit grab, and the icmp correct. +extern void a(const char *); + +void intel() { + if (__builtin_cpu_is("intel")) + a("intel"); + + // CHECK: [[LOAD:%[^ ]+]] = load i32, i32* getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, { i32, i32, i32, [1 x i32] }* @__cpu_model, i32 0, i32 0) + // CHECK: = icmp eq i32 [[LOAD]], 1 +} + +void amd() { + if (__builtin_cpu_is("amd")) + a("amd"); + + // CHECK: [[LOAD:%[^ ]+]] = load i32, i32* getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, { i32, i32, i32, [1 x i32] }* @__cpu_model, i32 0, i32 0) + // CHECK: = icmp eq i32 [[LOAD]], 2 +} + +void atom() { + if (__builtin_cpu_is("atom")) + a("atom"); + + // CHECK: [[LOAD:%[^ ]+]] = load i32, i32* getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, { i32, i32, i32, [1 x i32] }* @__cpu_model, i32 0, i32 1) + // CHECK: = icmp eq i32 [[LOAD]], 1 +} + +void amdfam10h() { + if (__builtin_cpu_is("amdfam10h")) + a("amdfam10h"); + + // CHECK: [[LOAD:%[^ ]+]] = load i32, i32* getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, { i32, i32, i32, [1 x i32] }* @__cpu_model, i32 0, i32 1) + // CHECK: = icmp eq i32 [[LOAD]], 4 +} + +void barcelona() { + if (__builtin_cpu_is("barcelona")) + a("barcelona"); + + // CHECK: [[LOAD:%[^ ]+]] = load i32, i32* getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, { i32, i32, i32, [1 x i32] }* @__cpu_model, i32 0, i32 2) + // CHECK: = icmp eq i32 [[LOAD]], 4 +} + +void nehalem() { + if (__builtin_cpu_is("nehalem")) + a("nehalem"); + + // CHECK: [[LOAD:%[^ ]+]] = load i32, i32* getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, { i32, i32, i32, [1 x i32] }* @__cpu_model, i32 0, i32 2) + // CHECK: = icmp eq i32 [[LOAD]], 1 +} diff --git a/test/CodeGen/target-builtin-noerror.c b/test/CodeGen/target-builtin-noerror.c index dcc77d77bf..fcf9bc945d 100644 --- a/test/CodeGen/target-builtin-noerror.c +++ b/test/CodeGen/target-builtin-noerror.c @@ -76,3 +76,35 @@ void verifyfeaturestrings() { (void)__builtin_cpu_supports("avx5124fmaps"); (void)__builtin_cpu_supports("avx512vpopcntdq"); } + +void verifycpustrings() { + (void)__builtin_cpu_is("amd"); + (void)__builtin_cpu_is("amdfam10h"); + (void)__builtin_cpu_is("amdfam15h"); + (void)__builtin_cpu_is("atom"); + (void)__builtin_cpu_is("barcelona"); + (void)__builtin_cpu_is("bdver1"); + (void)__builtin_cpu_is("bdver2"); + (void)__builtin_cpu_is("bdver3"); + (void)__builtin_cpu_is("bdver4"); + (void)__builtin_cpu_is("bonnell"); + (void)__builtin_cpu_is("broadwell"); + (void)__builtin_cpu_is("btver1"); + (void)__builtin_cpu_is("btver2"); + (void)__builtin_cpu_is("core2"); + (void)__builtin_cpu_is("corei7"); + (void)__builtin_cpu_is("haswell"); + (void)__builtin_cpu_is("intel"); + (void)__builtin_cpu_is("istanbul"); + (void)__builtin_cpu_is("ivybridge"); + (void)__builtin_cpu_is("knl"); + (void)__builtin_cpu_is("nehalem"); + (void)__builtin_cpu_is("sandybridge"); + (void)__builtin_cpu_is("shanghai"); + (void)__builtin_cpu_is("silvermont"); + (void)__builtin_cpu_is("skylake"); + (void)__builtin_cpu_is("skylake-avx512"); + (void)__builtin_cpu_is("slm"); + (void)__builtin_cpu_is("westmere"); + (void)__builtin_cpu_is("znver1"); +} diff --git a/test/Sema/builtin-cpu-supports.c b/test/Sema/builtin-cpu-supports.c index c537b4140b..026b5b7e38 100644 --- a/test/Sema/builtin-cpu-supports.c +++ b/test/Sema/builtin-cpu-supports.c @@ -12,9 +12,15 @@ int main() { if (__builtin_cpu_supports(str)) // expected-error {{expression is not a string literal}} a(str); + + if (__builtin_cpu_is("int")) // expected-error {{invalid cpu name for builtin}} + a("intel"); #else if (__builtin_cpu_supports("vsx")) // expected-error {{use of unknown builtin}} a("vsx"); + + if (__builtin_cpu_is("pwr9")) // expected-error {{use of unknown builtin}} + a("pwr9"); #endif return 0; -- 2.40.0