From afb075bfdb5cafe1edb40a3508f74b56d5b6fc4b Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Wed, 10 Aug 2016 07:32:47 +0000
Subject: [PATCH] [x86] Fix a really nasty bug introduced in r276417 where
 alignment constraints were added to _mm256_broadcast_{pd,ps} intel
 intrinsics.

The spec for these intrinics is ... pretty much silent on alignment.
This is especially frustrating considering the amount of discussion of
alignment in the load and store instrinsics. So I was forced to rely on
the specification for the VBROADCASTF128 instruction.

That instruction's spec is *also* completely silent on alignment.
Fortunately, when it comes to the instruction's spec, silence is enough.
There is no #GP fault option for an underaligned address so this
instruction, and by inference the intrinsic, can read any alignment.

As it happens, the old code worked exactly this way and in fact we have
plenty of code that hands pointers with less than 16-byte alignment to
these intrinsics. This code broke pretty spectacularly with this commit.

Fortunately, the fix is super simple! Change a 16 to a 1, and ta da!

Anyways, a lot of debugging for a really boring fix. =]

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@278202 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/CGBuiltin.cpp   | 2 +-
 test/CodeGen/avx-builtins.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 5f47cb4e3d..87a825d46a 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -7020,7 +7020,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
     llvm::Type *DstTy = ConvertType(E->getType());
-    return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 16);
+    return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
   }
 
   case X86::BI__builtin_ia32_storehps:
diff --git a/test/CodeGen/avx-builtins.c b/test/CodeGen/avx-builtins.c
index d7e6a0beb7..8851181f56 100644
--- a/test/CodeGen/avx-builtins.c
+++ b/test/CodeGen/avx-builtins.c
@@ -84,14 +84,14 @@ __m256 test_mm256_blendv_ps(__m256 V1, __m256 V2, __m256 V3) {
 
 __m256d test_mm256_broadcast_pd(__m128d* A) {
   // CHECK-LABEL: test_mm256_broadcast_pd
-  // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
+  // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   return _mm256_broadcast_pd(A);
 }
 
 __m256 test_mm256_broadcast_ps(__m128* A) {
   // CHECK-LABEL: test_mm256_broadcast_ps
-  // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 16
+  // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   return _mm256_broadcast_ps(A);
 }
-- 
2.40.0