From 2ee2ac2293f313dfe1c6eb7034527a92b5d23158 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 25 Jan 2012 04:26:17 +0000 Subject: [PATCH] Represent 256-bit unaligned loads natively and remove the builtins. Similar change was made for 128-bit versions a while back. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@148919 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 3 --- lib/Headers/avxintrin.h | 15 ++++++++++++--- test/CodeGen/avx-builtins.c | 25 +++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/avx-builtins.c diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 17a7cc63e1..2356b54d3c 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -453,11 +453,8 @@ BUILTIN(__builtin_ia32_vbroadcastsd256, "V4ddC*", "") BUILTIN(__builtin_ia32_vbroadcastss256, "V8ffC*", "") BUILTIN(__builtin_ia32_vbroadcastf128_pd256, "V4dV2dC*", "") BUILTIN(__builtin_ia32_vbroadcastf128_ps256, "V8fV4fC*", "") -BUILTIN(__builtin_ia32_loadupd256, "V4ddC*", "") -BUILTIN(__builtin_ia32_loadups256, "V8ffC*", "") BUILTIN(__builtin_ia32_storeupd256, "vd*V4d", "") BUILTIN(__builtin_ia32_storeups256, "vf*V8f", "") -BUILTIN(__builtin_ia32_loaddqu256, "V32ccC*", "") BUILTIN(__builtin_ia32_storedqu256, "vc*V32c", "") BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "") BUILTIN(__builtin_ia32_movntdq256, "vV4LLi*V4LLi", "") diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h index 620ee0fbb0..ce4b2264bf 100644 --- a/lib/Headers/avxintrin.h +++ b/lib/Headers/avxintrin.h @@ -751,13 +751,19 @@ _mm256_load_ps(float const *p) static __inline __m256d __attribute__((__always_inline__, __nodebug__)) _mm256_loadu_pd(double const *p) { - return (__m256d)__builtin_ia32_loadupd256(p); + struct __loadu_pd { + __m256d v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_pd*)p)->v; } static __inline __m256 __attribute__((__always_inline__, __nodebug__)) _mm256_loadu_ps(float const *p) { - return (__m256)__builtin_ia32_loadups256(p); + struct __loadu_ps { + __m256 v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_ps*)p)->v; } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) @@ -769,7 +775,10 @@ _mm256_load_si256(__m256i const *p) static __inline __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_loadu_si256(__m256i const *p) { - return (__m256i)__builtin_ia32_loaddqu256((char const *)p); + struct __loadu_si256 { + __m256i v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_si256*)p)->v; } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) diff --git a/test/CodeGen/avx-builtins.c b/test/CodeGen/avx-builtins.c new file mode 100644 index 0000000000..f03141225c --- /dev/null +++ b/test/CodeGen/avx-builtins.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s + +// Don't include mm_malloc.h, it's system specific. +#define __MM_MALLOC_H + +#include + +// +// Test LLVM IR codegen of shuffle instructions +// + +__m256 test__mm256_loadu_ps(void* p) { + // CHECK: load <8 x float>* %{.*}, align 1 + return _mm256_loadu_ps(p); +} + +__m256d test__mm256_loadu_pd(void* p) { + // CHECK: load <4 x double>* %{.*}, align 1 + return _mm256_loadu_pd(p); +} + +__m256i test__mm256_loadu_si256(void* p) { + // CHECK: load <4 x i64>* %0, align 1 + return _mm256_loadu_si256(p); +} -- 2.40.0