From 5c5aea8028aa58b347eaf3b552bc158a2a85ee18 Mon Sep 17 00:00:00 2001 From: Wei Mi Date: Fri, 22 Sep 2017 16:30:00 +0000 Subject: [PATCH] [Atomic][X8664] set max atomic inline width according to the target This is to fix PR31620. MaxAtomicInlineWidth is set to 128 for x86_64. However for target without cx16 support, 128 atomic operation will generate __sync_* libcalls. The patch set MaxAtomicInlineWidth to 64 if the target doesn't support cx16. Differential Revision: https://reviews.llvm.org/D38046 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@313992 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/TargetInfo.h | 3 ++ lib/Basic/Targets.cpp | 1 + lib/Basic/Targets/X86.h | 8 ++- test/CodeGenCXX/atomic-inline.cpp | 69 ++++++++++++++++++++++++++ test/OpenMP/atomic_capture_codegen.cpp | 6 +-- test/OpenMP/atomic_read_codegen.c | 6 +-- test/OpenMP/atomic_update_codegen.cpp | 6 +-- test/OpenMP/atomic_write_codegen.c | 6 +-- 8 files changed, 92 insertions(+), 13 deletions(-) create mode 100644 test/CodeGenCXX/atomic-inline.cpp diff --git a/include/clang/Basic/TargetInfo.h b/include/clang/Basic/TargetInfo.h index 8bf31b075f..38a7bfed87 100644 --- a/include/clang/Basic/TargetInfo.h +++ b/include/clang/Basic/TargetInfo.h @@ -448,6 +448,9 @@ public: /// \brief Return the maximum width lock-free atomic operation which can be /// inlined given the supported features of the given target. unsigned getMaxAtomicInlineWidth() const { return MaxAtomicInlineWidth; } + /// \brief Set the maximum inline or promote width lock-free atomic operation + /// for the given target. + virtual void setMaxAtomicWidth() {} /// \brief Returns true if the given target supports lock-free atomic /// operations at the specified width and alignment. virtual bool hasBuiltinAtomic(uint64_t AtomicSizeInBits, diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index 3c1c08bb05..a9a5f4ddcf 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -620,6 +620,7 @@ TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags, Target->setSupportedOpenCLOpts(); Target->setOpenCLExtensionOpts(); + Target->setMaxAtomicWidth(); if (!Target->validateTarget(Diags)) return nullptr; diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h index 21ae4ff4e6..a08eeec59d 100644 --- a/lib/Basic/Targets/X86.h +++ b/lib/Basic/Targets/X86.h @@ -814,7 +814,7 @@ public: // x86-64 has atomics up to 16 bytes. MaxAtomicPromoteWidth = 128; - MaxAtomicInlineWidth = 128; + MaxAtomicInlineWidth = 64; } BuiltinVaListKind getBuiltinVaListKind() const override { @@ -872,6 +872,12 @@ public: HasSizeMismatch); } + void setMaxAtomicWidth() override { + if (hasFeature("cx16")) + MaxAtomicInlineWidth = 128; + return; + } + ArrayRef getTargetBuiltins() const override; }; diff --git a/test/CodeGenCXX/atomic-inline.cpp b/test/CodeGenCXX/atomic-inline.cpp new file mode 100644 index 0000000000..fe727589d2 --- /dev/null +++ b/test/CodeGenCXX/atomic-inline.cpp @@ -0,0 +1,69 @@ +// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu | FileCheck %s +// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu -target-cpu core2 | FileCheck %s --check-prefix=CORE2 +// Check the atomic code generation for cpu targets w/wo cx16 support. + +struct alignas(8) AM8 { + int f1, f2; +}; +AM8 m8; +AM8 load8() { + AM8 am; + // CHECK-LABEL: @_Z5load8v + // CHECK: load atomic i64, {{.*}} monotonic + // CORE2-LABEL: @_Z5load8v + // CORE2: load atomic i64, {{.*}} monotonic + __atomic_load(&m8, &am, 0); + return am; +} + +AM8 s8; +void store8() { + // CHECK-LABEL: @_Z6store8v + // CHECK: store atomic i64 {{.*}} monotonic + // CORE2-LABEL: @_Z6store8v + // CORE2: store atomic i64 {{.*}} monotonic + __atomic_store(&m8, &s8, 0); +} + +bool cmpxchg8() { + AM8 am; + // CHECK-LABEL: @_Z8cmpxchg8v + // CHECK: cmpxchg i64* {{.*}} monotonic + // CORE2-LABEL: @_Z8cmpxchg8v + // CORE2: cmpxchg i64* {{.*}} monotonic + return __atomic_compare_exchange(&m8, &s8, &am, 0, 0, 0); +} + +struct alignas(16) AM16 { + long f1, f2; +}; + +AM16 m16; +AM16 load16() { + AM16 am; + // CHECK-LABEL: @_Z6load16v + // CHECK: call void @__atomic_load + // CORE2-LABEL: @_Z6load16v + // CORE2: load atomic i128, {{.*}} monotonic + __atomic_load(&m16, &am, 0); + return am; +} + +AM16 s16; +void store16() { + // CHECK-LABEL: @_Z7store16v + // CHECK: call void @__atomic_store + // CORE2-LABEL: @_Z7store16v + // CORE2: store atomic i128 {{.*}} monotonic + __atomic_store(&m16, &s16, 0); +} + +bool cmpxchg16() { + AM16 am; + // CHECK-LABEL: @_Z9cmpxchg16v + // CHECK: call zeroext i1 @__atomic_compare_exchange + // CORE2-LABEL: @_Z9cmpxchg16v + // CORE2: cmpxchg i128* {{.*}} monotonic + return __atomic_compare_exchange(&m16, &s16, &am, 0, 0, 0); +} + diff --git a/test/OpenMP/atomic_capture_codegen.cpp b/test/OpenMP/atomic_capture_codegen.cpp index 72ecdf89ef..306b83f624 100644 --- a/test/OpenMP/atomic_capture_codegen.cpp +++ b/test/OpenMP/atomic_capture_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER diff --git a/test/OpenMP/atomic_read_codegen.c b/test/OpenMP/atomic_read_codegen.c index 0cd46e3821..0cfb2d26f2 100644 --- a/test/OpenMP/atomic_read_codegen.c +++ b/test/OpenMP/atomic_read_codegen.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // expected-no-diagnostics // REQUIRES: x86-registered-target #ifndef HEADER diff --git a/test/OpenMP/atomic_update_codegen.cpp b/test/OpenMP/atomic_update_codegen.cpp index 367567183a..1343cd8ad2 100644 --- a/test/OpenMP/atomic_update_codegen.cpp +++ b/test/OpenMP/atomic_update_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER diff --git a/test/OpenMP/atomic_write_codegen.c b/test/OpenMP/atomic_write_codegen.c index 050d7a5105..0c85b6e88a 100644 --- a/test/OpenMP/atomic_write_codegen.c +++ b/test/OpenMP/atomic_write_codegen.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // expected-no-diagnostics // REQUIRES: x86-registered-target #ifndef HEADER -- 2.40.0