]> granicus.if.org Git - clang/commitdiff
Lower _mm256_broadcastsi128_si256 directly to a vector shuffle.
authorJuergen Ributzka <juergen@apple.com>
Tue, 3 Mar 2015 17:22:53 +0000 (17:22 +0000)
committerJuergen Ributzka <juergen@apple.com>
Tue, 3 Mar 2015 17:22:53 +0000 (17:22 +0000)
Originally we were using the same GCC builtins to lower this AVX2 vector
intrinsic. Instead we will now lower it directly to a vector shuffle.

This will not only allow LLVM to generate better code, but it will also allow us
to remove the GCC intrinsics.

Reviewed by Andrea

This is related to rdar://problem/18742778.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@231081 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/BuiltinsX86.def
lib/CodeGen/CGBuiltin.cpp
lib/Headers/avx2intrin.h
test/CodeGen/avx2-builtins.c

index ce8dced1bc00299b6039417f06f1877a3798b5c3..acf2eb6acbb6307867591e059a5fdfa47825d005 100644 (file)
@@ -582,7 +582,6 @@ BUILTIN(__builtin_ia32_movntdqa256, "V4LLiV4LLi*", "")
 BUILTIN(__builtin_ia32_vbroadcastss_ps, "V4fV4f", "")
 BUILTIN(__builtin_ia32_vbroadcastss_ps256, "V8fV4f", "")
 BUILTIN(__builtin_ia32_vbroadcastsd_pd256, "V4dV2d", "")
-BUILTIN(__builtin_ia32_vbroadcastsi256, "V4LLiV2LLi", "")
 BUILTIN(__builtin_ia32_pbroadcastb256, "V32cV16c", "")
 BUILTIN(__builtin_ia32_pbroadcastw256, "V16sV8s", "")
 BUILTIN(__builtin_ia32_pbroadcastd256, "V8iV4i", "")
index bf7d86fb12fa2f1421b2e66288891045e3948399..d3d1e22b4bb1a40f4717eb2e72ba5f23b184475a 100644 (file)
@@ -6090,13 +6090,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]);
     return Builder.CreateExtractValue(Call, 1);
   }
-  // AVX2 broadcast
-  case X86::BI__builtin_ia32_vbroadcastsi256: {
-    Value *VecTmp = CreateMemTemp(E->getArg(0)->getType());
-    Builder.CreateStore(Ops[0], VecTmp);
-    Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128);
-    return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy));
-  }
   // SSE comparison intrisics
   case X86::BI__builtin_ia32_cmpeqps:
   case X86::BI__builtin_ia32_cmpltps:
index 394fdfee96522ca4b4cd5cfe44f7a97f1b82f006..af43bec0efdd0496c544e1f13c5565502a727cd4 100644 (file)
@@ -771,7 +771,7 @@ _mm256_broadcastsd_pd(__m128d __X)
 static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_broadcastsi128_si256(__m128i __X)
 {
-  return (__m256i)__builtin_ia32_vbroadcastsi256(__X);
+  return (__m256i)__builtin_shufflevector(__X, __X, 0, 1, 0, 1);
 }
 
 #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
index 27ee91e4ada24451dd2a4929d1778be229049e66..371f9c6ee24c8a3aafad92221a0b98108018b185 100644 (file)
@@ -612,7 +612,7 @@ __m256d test_mm256_broadcastsd_pd(__m128d a) {
 }
 
 __m256i test_mm256_broadcastsi128_si256(__m128i a) {
-  // CHECK: @llvm.x86.avx2.vbroadcasti128
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   return _mm256_broadcastsi128_si256(a);
 }