TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIc", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIc", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIc", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_vpermilps, "V4fV4fIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_vpermilpd256, "V4dV4dIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIc", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIc", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIc", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "nc", "avx2")
+TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIc", "nc", "avx2")
+TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIc", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "n", "avx2")
TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "n", "avx2")
TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "n", "avx2")
TARGET_BUILTIN(__builtin_ia32_alignd256, "V8iV8iV8iIi", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_alignq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_alignq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extractf64x4, "V4dV8dIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extractf32x4, "V4fV16fIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extractf32x8, "V8fV16fIi", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extractf64x2_512, "V2dV8dIi", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extracti32x8, "V8iV16iIi", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extracti64x2_512, "V2LLiV8LLiIi", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extracti32x4, "V4iV16iIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extracti64x4, "V4LLiV8LLiIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extractf64x2_256, "V2dV4dIi", "nc", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extracti64x2_256, "V2LLiV4LLiIi", "nc", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extractf32x4_256, "V4fV8fIi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extracti32x4_256, "V4iV8iIi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_insertf32x8, "V16fV16fV8fIi", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_insertf64x2_512, "V8dV8dV2dIi", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_inserti32x8, "V16iV16iV8iIi", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_inserti64x2_512, "V8LLiV8LLiV2LLiIi", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_insertf64x4, "V8dV8dV4dIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_inserti64x4, "V8LLiV8LLiV4LLiIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_insertf64x2_256, "V4dV4dV2dIi", "nc", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_inserti64x2_256, "V4LLiV4LLiV2LLiIi", "nc", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_insertf32x4_256, "V8fV8fV4fIi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_inserti32x4_256, "V8iV8iV4iIi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_insertf32x4, "V16fV16fV4fIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_inserti32x4, "V16iV16iV4iIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2diV2dUc", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4diV4dUc", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fiV4fUc", "nc", "avx512vl")
Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256:
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_extractf64x4:
+ case X86::BI__builtin_ia32_extractf32x4:
+ case X86::BI__builtin_ia32_extracti64x4:
+ case X86::BI__builtin_ia32_extracti32x4:
+ case X86::BI__builtin_ia32_extractf32x8:
+ case X86::BI__builtin_ia32_extracti32x8:
+ case X86::BI__builtin_ia32_extractf32x4_256:
+ case X86::BI__builtin_ia32_extracti32x4_256:
+ case X86::BI__builtin_ia32_extractf64x2_256:
+ case X86::BI__builtin_ia32_extracti64x2_256:
+ case X86::BI__builtin_ia32_extractf64x2_512:
+ case X86::BI__builtin_ia32_extracti64x2_512: {
+ llvm::Type *DstTy = ConvertType(E->getType());
+ unsigned NumElts = DstTy->getVectorNumElements();
+ unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue() * NumElts;
+
+ uint32_t Indices[16];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i + Index;
+
+ return Builder.CreateShuffleVector(Ops[0],
+ UndefValue::get(Ops[0]->getType()),
+ makeArrayRef(Indices, NumElts),
+ "extract");
+ }
+ case X86::BI__builtin_ia32_vinsertf128_pd256:
+ case X86::BI__builtin_ia32_vinsertf128_ps256:
+ case X86::BI__builtin_ia32_vinsertf128_si256:
+ case X86::BI__builtin_ia32_insert128i256:
+ case X86::BI__builtin_ia32_insertf64x4:
+ case X86::BI__builtin_ia32_insertf32x4:
+ case X86::BI__builtin_ia32_inserti64x4:
+ case X86::BI__builtin_ia32_inserti32x4:
+ case X86::BI__builtin_ia32_insertf32x8:
+ case X86::BI__builtin_ia32_inserti32x8:
+ case X86::BI__builtin_ia32_insertf32x4_256:
+ case X86::BI__builtin_ia32_inserti32x4_256:
+ case X86::BI__builtin_ia32_insertf64x2_256:
+ case X86::BI__builtin_ia32_inserti64x2_256:
+ case X86::BI__builtin_ia32_insertf64x2_512:
+ case X86::BI__builtin_ia32_inserti64x2_512: {
+ unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements();
+ unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue() * SrcNumElts;
+
+ uint32_t Indices[16];
+ for (unsigned i = 0; i != DstNumElts; ++i)
+ Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
+
+ Value *Op1 = Builder.CreateShuffleVector(Ops[1],
+ UndefValue::get(Ops[1]->getType()),
+ makeArrayRef(Indices, DstNumElts),
+ "widen");
+
+ for (unsigned i = 0; i != DstNumElts; ++i) {
+ if (i >= Index && i < (Index + SrcNumElts))
+ Indices[i] = (i - Index) + DstNumElts;
+ else
+ Indices[i] = i;
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], Op1,
+ makeArrayRef(Indices, DstNumElts),
+ "insert");
+ }
case X86::BI__builtin_ia32_pblendw128:
case X86::BI__builtin_ia32_blendpd:
case X86::BI__builtin_ia32_blendps:
(__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M))
#define _mm256_extracti128_si256(V, M) \
- (__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \
- (__v4di)_mm256_undefined_si256(), \
- (((M) & 1) ? 2 : 0), \
- (((M) & 1) ? 3 : 1) )
+ (__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M))
#define _mm256_inserti128_si256(V1, V2, M) \
- (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V1), \
- (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
- (((M) & 1) ? 0 : 4), \
- (((M) & 1) ? 1 : 5), \
- (((M) & 1) ? 4 : 2), \
- (((M) & 1) ? 5 : 3) )
+ (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \
+ (__v2di)(__m128i)(V2), (int)(M))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskload_epi32(int const *__X, __m256i __M)
}
#define _mm512_extractf32x8_ps(A, imm) \
- (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_undefined_ps(), \
- ((imm) & 1) ? 8 : 0, \
- ((imm) & 1) ? 9 : 1, \
- ((imm) & 1) ? 10 : 2, \
- ((imm) & 1) ? 11 : 3, \
- ((imm) & 1) ? 12 : 4, \
- ((imm) & 1) ? 13 : 5, \
- ((imm) & 1) ? 14 : 6, \
- ((imm) & 1) ? 15 : 7)
+ (__m256)__builtin_ia32_extractf32x8((__v16sf)(__m512)(A), (int)(imm))
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
(__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
(__v8sf)_mm256_setzero_ps())
#define _mm512_extractf64x2_pd(A, imm) \
- (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \
- (__v8df)_mm512_undefined_pd(), \
- 0 + ((imm) & 0x3) * 2, \
- 1 + ((imm) & 0x3) * 2)
+ (__m128d)__builtin_ia32_extractf64x2_512((__v8df)(__m512d)(A), (int)(imm))
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
(__v2df)_mm_setzero_pd())
#define _mm512_extracti32x8_epi32(A, imm) \
- (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \
- (__v16si)_mm512_undefined_epi32(), \
- ((imm) & 1) ? 8 : 0, \
- ((imm) & 1) ? 9 : 1, \
- ((imm) & 1) ? 10 : 2, \
- ((imm) & 1) ? 11 : 3, \
- ((imm) & 1) ? 12 : 4, \
- ((imm) & 1) ? 13 : 5, \
- ((imm) & 1) ? 14 : 6, \
- ((imm) & 1) ? 15 : 7)
+ (__m256i)__builtin_ia32_extracti32x8((__v16si)(__m512i)(A), (int)(imm))
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_setzero_si256())
#define _mm512_extracti64x2_epi64(A, imm) \
- (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \
- (__v8di)_mm512_undefined_epi32(), \
- 0 + ((imm) & 0x3) * 2, \
- 1 + ((imm) & 0x3) * 2)
+ (__m128i)__builtin_ia32_extracti64x2_512((__v8di)(__m512i)(A), (int)(imm))
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
(__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
(__v2di)_mm_setzero_si128())
#define _mm512_insertf32x8(A, B, imm) \
- (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_castps256_ps512((__m256)(B)),\
- ((imm) & 0x1) ? 0 : 16, \
- ((imm) & 0x1) ? 1 : 17, \
- ((imm) & 0x1) ? 2 : 18, \
- ((imm) & 0x1) ? 3 : 19, \
- ((imm) & 0x1) ? 4 : 20, \
- ((imm) & 0x1) ? 5 : 21, \
- ((imm) & 0x1) ? 6 : 22, \
- ((imm) & 0x1) ? 7 : 23, \
- ((imm) & 0x1) ? 16 : 8, \
- ((imm) & 0x1) ? 17 : 9, \
- ((imm) & 0x1) ? 18 : 10, \
- ((imm) & 0x1) ? 19 : 11, \
- ((imm) & 0x1) ? 20 : 12, \
- ((imm) & 0x1) ? 21 : 13, \
- ((imm) & 0x1) ? 22 : 14, \
- ((imm) & 0x1) ? 23 : 15)
+ (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \
+ (__v8sf)(__m256)(B), (int)(imm))
#define _mm512_mask_insertf32x8(W, U, A, B, imm) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_setzero_ps())
#define _mm512_insertf64x2(A, B, imm) \
- (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
- (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\
- (((imm) & 0x3) == 0) ? 8 : 0, \
- (((imm) & 0x3) == 0) ? 9 : 1, \
- (((imm) & 0x3) == 1) ? 8 : 2, \
- (((imm) & 0x3) == 1) ? 9 : 3, \
- (((imm) & 0x3) == 2) ? 8 : 4, \
- (((imm) & 0x3) == 2) ? 9 : 5, \
- (((imm) & 0x3) == 3) ? 8 : 6, \
- (((imm) & 0x3) == 3) ? 9 : 7)
+ (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \
+ (__v2df)(__m128d)(B), (int)(imm))
#define _mm512_mask_insertf64x2(W, U, A, B, imm) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_setzero_pd())
#define _mm512_inserti32x8(A, B, imm) \
- (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
- (__v16si)_mm512_castsi256_si512((__m256i)(B)),\
- ((imm) & 0x1) ? 0 : 16, \
- ((imm) & 0x1) ? 1 : 17, \
- ((imm) & 0x1) ? 2 : 18, \
- ((imm) & 0x1) ? 3 : 19, \
- ((imm) & 0x1) ? 4 : 20, \
- ((imm) & 0x1) ? 5 : 21, \
- ((imm) & 0x1) ? 6 : 22, \
- ((imm) & 0x1) ? 7 : 23, \
- ((imm) & 0x1) ? 16 : 8, \
- ((imm) & 0x1) ? 17 : 9, \
- ((imm) & 0x1) ? 18 : 10, \
- ((imm) & 0x1) ? 19 : 11, \
- ((imm) & 0x1) ? 20 : 12, \
- ((imm) & 0x1) ? 21 : 13, \
- ((imm) & 0x1) ? 22 : 14, \
- ((imm) & 0x1) ? 23 : 15)
+ (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \
+ (__v8si)(__m256i)(B), (int)(imm))
#define _mm512_mask_inserti32x8(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_setzero_si512())
#define _mm512_inserti64x2(A, B, imm) \
- (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
- (__v8di)_mm512_castsi128_si512((__m128i)(B)),\
- (((imm) & 0x3) == 0) ? 8 : 0, \
- (((imm) & 0x3) == 0) ? 9 : 1, \
- (((imm) & 0x3) == 1) ? 8 : 2, \
- (((imm) & 0x3) == 1) ? 9 : 3, \
- (((imm) & 0x3) == 2) ? 8 : 4, \
- (((imm) & 0x3) == 2) ? 9 : 5, \
- (((imm) & 0x3) == 3) ? 8 : 6, \
- (((imm) & 0x3) == 3) ? 9 : 7)
+ (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \
+ (__v2di)(__m128i)(B), (int)(imm))
#define _mm512_mask_inserti64x2(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
/* Vector Extract */
#define _mm512_extractf64x4_pd(A, I) \
- (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \
- (__v8df)_mm512_undefined_pd(), \
- ((I) & 1) ? 4 : 0, \
- ((I) & 1) ? 5 : 1, \
- ((I) & 1) ? 6 : 2, \
- ((I) & 1) ? 7 : 3)
+ (__m256d)__builtin_ia32_extractf64x4((__v8df)(__m512d)(A), (int)(I))
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
(__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
(__v4df)_mm256_setzero_pd())
#define _mm512_extractf32x4_ps(A, I) \
- (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_undefined_ps(), \
- 0 + ((I) & 0x3) * 4, \
- 1 + ((I) & 0x3) * 4, \
- 2 + ((I) & 0x3) * 4, \
- 3 + ((I) & 0x3) * 4)
+ (__m128)__builtin_ia32_extractf32x4((__v16sf)(__m512)(A), (int)(I))
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
(__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
}
#define _mm512_extracti32x4_epi32(A, imm) \
- (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \
- (__v16si)_mm512_undefined_epi32(), \
- 0 + ((imm) & 0x3) * 4, \
- 1 + ((imm) & 0x3) * 4, \
- 2 + ((imm) & 0x3) * 4, \
- 3 + ((imm) & 0x3) * 4)
+ (__m128i)__builtin_ia32_extracti32x4((__v16si)(__m512i)(A), (int)(imm))
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm_setzero_si128())
#define _mm512_extracti64x4_epi64(A, imm) \
- (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \
- (__v8di)_mm512_undefined_epi32(), \
- ((imm) & 1) ? 4 : 0, \
- ((imm) & 1) ? 5 : 1, \
- ((imm) & 1) ? 6 : 2, \
- ((imm) & 1) ? 7 : 3)
+ (__m256i)__builtin_ia32_extracti64x4((__v8di)(__m512i)(A), (int)(imm))
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
(__v4di)_mm256_setzero_si256())
#define _mm512_insertf64x4(A, B, imm) \
- (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
- (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
- ((imm) & 0x1) ? 0 : 8, \
- ((imm) & 0x1) ? 1 : 9, \
- ((imm) & 0x1) ? 2 : 10, \
- ((imm) & 0x1) ? 3 : 11, \
- ((imm) & 0x1) ? 8 : 4, \
- ((imm) & 0x1) ? 9 : 5, \
- ((imm) & 0x1) ? 10 : 6, \
- ((imm) & 0x1) ? 11 : 7)
+ (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
+ (__v4df)(__m256d)(B), (int)(imm))
#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_setzero_pd())
#define _mm512_inserti64x4(A, B, imm) \
- (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
- (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
- ((imm) & 0x1) ? 0 : 8, \
- ((imm) & 0x1) ? 1 : 9, \
- ((imm) & 0x1) ? 2 : 10, \
- ((imm) & 0x1) ? 3 : 11, \
- ((imm) & 0x1) ? 8 : 4, \
- ((imm) & 0x1) ? 9 : 5, \
- ((imm) & 0x1) ? 10 : 6, \
- ((imm) & 0x1) ? 11 : 7)
+ (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
+ (__v4di)(__m256i)(B), (int)(imm))
#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
(__v8di)_mm512_setzero_si512())
#define _mm512_insertf32x4(A, B, imm) \
- (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
- (((imm) & 0x3) == 0) ? 16 : 0, \
- (((imm) & 0x3) == 0) ? 17 : 1, \
- (((imm) & 0x3) == 0) ? 18 : 2, \
- (((imm) & 0x3) == 0) ? 19 : 3, \
- (((imm) & 0x3) == 1) ? 16 : 4, \
- (((imm) & 0x3) == 1) ? 17 : 5, \
- (((imm) & 0x3) == 1) ? 18 : 6, \
- (((imm) & 0x3) == 1) ? 19 : 7, \
- (((imm) & 0x3) == 2) ? 16 : 8, \
- (((imm) & 0x3) == 2) ? 17 : 9, \
- (((imm) & 0x3) == 2) ? 18 : 10, \
- (((imm) & 0x3) == 2) ? 19 : 11, \
- (((imm) & 0x3) == 3) ? 16 : 12, \
- (((imm) & 0x3) == 3) ? 17 : 13, \
- (((imm) & 0x3) == 3) ? 18 : 14, \
- (((imm) & 0x3) == 3) ? 19 : 15)
+ (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
+ (__v4sf)(__m128)(B), (int)(imm))
#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_setzero_ps())
#define _mm512_inserti32x4(A, B, imm) \
- (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
- (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
- (((imm) & 0x3) == 0) ? 16 : 0, \
- (((imm) & 0x3) == 0) ? 17 : 1, \
- (((imm) & 0x3) == 0) ? 18 : 2, \
- (((imm) & 0x3) == 0) ? 19 : 3, \
- (((imm) & 0x3) == 1) ? 16 : 4, \
- (((imm) & 0x3) == 1) ? 17 : 5, \
- (((imm) & 0x3) == 1) ? 18 : 6, \
- (((imm) & 0x3) == 1) ? 19 : 7, \
- (((imm) & 0x3) == 2) ? 16 : 8, \
- (((imm) & 0x3) == 2) ? 17 : 9, \
- (((imm) & 0x3) == 2) ? 18 : 10, \
- (((imm) & 0x3) == 2) ? 19 : 11, \
- (((imm) & 0x3) == 3) ? 16 : 12, \
- (((imm) & 0x3) == 3) ? 17 : 13, \
- (((imm) & 0x3) == 3) ? 18 : 14, \
- (((imm) & 0x3) == 3) ? 19 : 15)
+ (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
+ (__v4si)(__m128i)(B), (int)(imm))
#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
}
#define _mm256_extractf64x2_pd(A, imm) \
- (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \
- (__v4df)_mm256_undefined_pd(), \
- ((imm) & 1) ? 2 : 0, \
- ((imm) & 1) ? 3 : 1)
+ (__m128d)__builtin_ia32_extractf64x2_256((__v4df)(__m256d)(A), (int)(imm))
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
(__v2df)_mm_setzero_pd())
#define _mm256_extracti64x2_epi64(A, imm) \
- (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \
- (__v4di)_mm256_undefined_si256(), \
- ((imm) & 1) ? 2 : 0, \
- ((imm) & 1) ? 3 : 1)
+ (__m128i)__builtin_ia32_extracti64x2_256((__v4di)(__m256i)(A), (int)(imm))
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
(__v2di)_mm_setzero_si128())
#define _mm256_insertf64x2(A, B, imm) \
- (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
- (__v4df)_mm256_castpd128_pd256((__m128d)(B)), \
- ((imm) & 0x1) ? 0 : 4, \
- ((imm) & 0x1) ? 1 : 5, \
- ((imm) & 0x1) ? 4 : 2, \
- ((imm) & 0x1) ? 5 : 3)
+ (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
+ (__v2df)(__m128d)(B), (int)(imm))
#define _mm256_mask_insertf64x2(W, U, A, B, imm) \
(__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
(__v4df)_mm256_setzero_pd())
#define _mm256_inserti64x2(A, B, imm) \
- (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \
- (__v4di)_mm256_castsi128_si256((__m128i)(B)), \
- ((imm) & 0x1) ? 0 : 4, \
- ((imm) & 0x1) ? 1 : 5, \
- ((imm) & 0x1) ? 4 : 2, \
- ((imm) & 0x1) ? 5 : 3)
+ (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
+ (__v2di)(__m128i)(B), (int)(imm))
#define _mm256_mask_inserti64x2(W, U, A, B, imm) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
}
#define _mm256_extractf32x4_ps(A, imm) \
- (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \
- (__v8sf)_mm256_undefined_ps(), \
- ((imm) & 1) ? 4 : 0, \
- ((imm) & 1) ? 5 : 1, \
- ((imm) & 1) ? 6 : 2, \
- ((imm) & 1) ? 7 : 3)
+ (__m128)__builtin_ia32_extractf32x4_256((__v8sf)(__m256)(A), (int)(imm))
#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
(__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
(__v4sf)_mm_setzero_ps())
#define _mm256_extracti32x4_epi32(A, imm) \
- (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \
- (__v8si)_mm256_undefined_si256(), \
- ((imm) & 1) ? 4 : 0, \
- ((imm) & 1) ? 5 : 1, \
- ((imm) & 1) ? 6 : 2, \
- ((imm) & 1) ? 7 : 3)
+ (__m128i)__builtin_ia32_extracti32x4_256((__v8si)(__m256i)(A), (int)(imm))
#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm_setzero_si128())
#define _mm256_insertf32x4(A, B, imm) \
- (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
- (__v8sf)_mm256_castps128_ps256((__m128)(B)), \
- ((imm) & 0x1) ? 0 : 8, \
- ((imm) & 0x1) ? 1 : 9, \
- ((imm) & 0x1) ? 2 : 10, \
- ((imm) & 0x1) ? 3 : 11, \
- ((imm) & 0x1) ? 8 : 4, \
- ((imm) & 0x1) ? 9 : 5, \
- ((imm) & 0x1) ? 10 : 6, \
- ((imm) & 0x1) ? 11 : 7)
+ (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
+ (__v4sf)(__m128)(B), (int)(imm))
#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
(__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
(__v8sf)_mm256_setzero_ps())
#define _mm256_inserti32x4(A, B, imm) \
- (__m256i)__builtin_shufflevector((__v8si)(__m256i)(A), \
- (__v8si)_mm256_castsi128_si256((__m128i)(B)), \
- ((imm) & 0x1) ? 0 : 8, \
- ((imm) & 0x1) ? 1 : 9, \
- ((imm) & 0x1) ? 2 : 10, \
- ((imm) & 0x1) ? 3 : 11, \
- ((imm) & 0x1) ? 8 : 4, \
- ((imm) & 0x1) ? 9 : 5, \
- ((imm) & 0x1) ? 10 : 6, \
- ((imm) & 0x1) ? 11 : 7)
+ (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
+ (__v4si)(__m128i)(B), (int)(imm))
#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
/// result.
/// \returns A 256-bit vector of [8 x float] containing the interleaved values.
#define _mm256_insertf128_ps(V1, V2, M) \
- (__m256)__builtin_shufflevector( \
- (__v8sf)(__m256)(V1), \
- (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
- (((M) & 1) ? 0 : 8), \
- (((M) & 1) ? 1 : 9), \
- (((M) & 1) ? 2 : 10), \
- (((M) & 1) ? 3 : 11), \
- (((M) & 1) ? 8 : 4), \
- (((M) & 1) ? 9 : 5), \
- (((M) & 1) ? 10 : 6), \
- (((M) & 1) ? 11 : 7) )
+ (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \
+ (__v4sf)(__m128)(V2), (int)(M))
/// Constructs a new 256-bit vector of [4 x double] by first duplicating
/// a 256-bit vector of [4 x double] given in the first parameter, and then
/// result.
/// \returns A 256-bit vector of [4 x double] containing the interleaved values.
#define _mm256_insertf128_pd(V1, V2, M) \
- (__m256d)__builtin_shufflevector( \
- (__v4df)(__m256d)(V1), \
- (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
- (((M) & 1) ? 0 : 4), \
- (((M) & 1) ? 1 : 5), \
- (((M) & 1) ? 4 : 2), \
- (((M) & 1) ? 5 : 3) )
+ (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \
+ (__v2df)(__m128d)(V2), (int)(M))
/// Constructs a new 256-bit integer vector by first duplicating a
/// 256-bit integer vector given in the first parameter, and then replacing
/// result.
/// \returns A 256-bit integer vector containing the interleaved values.
#define _mm256_insertf128_si256(V1, V2, M) \
- (__m256i)__builtin_shufflevector( \
- (__v4di)(__m256i)(V1), \
- (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
- (((M) & 1) ? 0 : 4), \
- (((M) & 1) ? 1 : 5), \
- (((M) & 1) ? 4 : 2), \
- (((M) & 1) ? 5 : 3) )
+ (__m256i)__builtin_ia32_vinsertf128_si256((__v4di)(__m256i)(V1), \
+ (__v2di)(__m128i)(V2), (int)(M))
/*
Vector extract.
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit vector of [4 x float] containing the extracted bits.
#define _mm256_extractf128_ps(V, M) \
- (__m128)__builtin_shufflevector( \
- (__v8sf)(__m256)(V), \
- (__v8sf)(_mm256_undefined_ps()), \
- (((M) & 1) ? 4 : 0), \
- (((M) & 1) ? 5 : 1), \
- (((M) & 1) ? 6 : 2), \
- (((M) & 1) ? 7 : 3) )
+ (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))
/// Extracts either the upper or the lower 128 bits from a 256-bit vector
/// of [4 x double], as determined by the immediate integer parameter, and
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit vector of [2 x double] containing the extracted bits.
#define _mm256_extractf128_pd(V, M) \
- (__m128d)__builtin_shufflevector( \
- (__v4df)(__m256d)(V), \
- (__v4df)(_mm256_undefined_pd()), \
- (((M) & 1) ? 2 : 0), \
- (((M) & 1) ? 3 : 1) )
+ (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))
/// Extracts either the upper or the lower 128 bits from a 256-bit
/// integer vector, as determined by the immediate integer parameter, and
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit integer vector containing the extracted bits.
#define _mm256_extractf128_si256(V, M) \
- (__m128i)__builtin_shufflevector( \
- (__v4di)(__m256i)(V), \
- (__v4di)(_mm256_undefined_si256()), \
- (((M) & 1) ? 2 : 0), \
- (((M) & 1) ? 3 : 1) )
+ (__m128i)__builtin_ia32_vextractf128_si256((__v4di)(__m256i)(V), (int)(M))
/* SIMD load ops (unaligned) */
/// Loads two 128-bit floating-point vectors of [4 x float] from
return false;
case X86::BI__builtin_ia32_vec_ext_v2si:
case X86::BI__builtin_ia32_vec_ext_v2di:
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256:
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_extractf64x4:
+ case X86::BI__builtin_ia32_extracti64x4:
+ case X86::BI__builtin_ia32_extractf32x8:
+ case X86::BI__builtin_ia32_extracti32x8:
+ case X86::BI__builtin_ia32_extractf64x2_256:
+ case X86::BI__builtin_ia32_extracti64x2_256:
+ case X86::BI__builtin_ia32_extractf32x4_256:
+ case X86::BI__builtin_ia32_extracti32x4_256:
i = 1; l = 0; u = 1;
break;
case X86::BI__builtin_ia32_vec_set_v2di:
+ case X86::BI__builtin_ia32_vinsertf128_pd256:
+ case X86::BI__builtin_ia32_vinsertf128_ps256:
+ case X86::BI__builtin_ia32_vinsertf128_si256:
+ case X86::BI__builtin_ia32_insert128i256:
+ case X86::BI__builtin_ia32_insertf32x8:
+ case X86::BI__builtin_ia32_inserti32x8:
+ case X86::BI__builtin_ia32_insertf64x4:
+ case X86::BI__builtin_ia32_inserti64x4:
+ case X86::BI__builtin_ia32_insertf64x2_256:
+ case X86::BI__builtin_ia32_inserti64x2_256:
+ case X86::BI__builtin_ia32_insertf32x4_256:
+ case X86::BI__builtin_ia32_inserti32x4_256:
i = 2; l = 0; u = 1;
break;
case X86::BI__builtin_ia32_vpermilpd:
case X86::BI__builtin_ia32_vec_ext_v4si:
case X86::BI__builtin_ia32_vec_ext_v4sf:
case X86::BI__builtin_ia32_vec_ext_v4di:
+ case X86::BI__builtin_ia32_extractf32x4:
+ case X86::BI__builtin_ia32_extracti32x4:
+ case X86::BI__builtin_ia32_extractf64x2_512:
+ case X86::BI__builtin_ia32_extracti64x2_512:
i = 1; l = 0; u = 3;
break;
case X86::BI_mm_prefetch:
case X86::BI__builtin_ia32_shuf_f64x2_256:
case X86::BI__builtin_ia32_shuf_i32x4_256:
case X86::BI__builtin_ia32_shuf_i64x2_256:
+ case X86::BI__builtin_ia32_insertf64x2_512:
+ case X86::BI__builtin_ia32_inserti64x2_512:
+ case X86::BI__builtin_ia32_insertf32x4:
+ case X86::BI__builtin_ia32_inserti32x4:
i = 2; l = 0; u = 3;
break;
case X86::BI__builtin_ia32_vpermil2pd:
__m128d test_mm256_extractf128_pd(__m256d A) {
// CHECK-LABEL: test_mm256_extractf128_pd
- // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <2 x i32> <i32 2, i32 3>
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
return _mm256_extractf128_pd(A, 1);
}
__m128 test_mm256_extractf128_ps(__m256 A) {
// CHECK-LABEL: test_mm256_extractf128_ps
- // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extractf128_ps(A, 1);
}
__m128i test_mm256_extractf128_si256(__m256i A) {
// CHECK-LABEL: test_mm256_extractf128_si256
- // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3>
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extractf128_si256(A, 1);
}
__m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
// CHECK-LABEL: test_mm256_insertf128_pd
- // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
return _mm256_insertf128_pd(A, B, 0);
}
__m256 test_mm256_insertf128_ps(__m256 A, __m128 B) {
// CHECK-LABEL: test_mm256_insertf128_ps
- // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
return _mm256_insertf128_ps(A, B, 1);
}
__m256i test_mm256_insertf128_si256(__m256i A, __m128i B) {
// CHECK-LABEL: test_mm256_insertf128_si256
- // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
- // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
return _mm256_insertf128_si256(A, B, 0);
}
// CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}}
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
// CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}}
- // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
return _mm256_loadu2_m128(A, B);
}
// CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
// CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
- // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
return _mm256_loadu2_m128d(A, B);
}
// CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
// CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
- // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
- // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
return _mm256_loadu2_m128i(A, B);
}
// CHECK-LABEL: test_mm256_storeu2_m128
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
- // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
_mm256_storeu2_m128(A, B, C);
}
// CHECK-LABEL: test_mm256_storeu2_m128d
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1{{$}}
- // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <2 x i32> <i32 2, i32 3>
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
// CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1{{$}}
_mm256_storeu2_m128d(A, B, C);
}
// CHECK-LABEL: test_mm256_storeu2_m128i
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
- // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3>
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
_mm256_storeu2_m128i(A, B, C);
}
__m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
// CHECK-LABEL: @test_mm256_insertf128_si256_0
- // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
+ // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
return _mm256_insertf128_si256(a, b, 0);
}
__m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) {
// CHECK-LABEL: @test_mm256_insertf128_si256_1
- // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
+ // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
return _mm256_insertf128_si256(a, b, 1);
}
__m128i test_mm256_extractf128_si256_0(__m256i a) {
// CHECK-LABEL: @test_mm256_extractf128_si256_0
- // CHECK: shufflevector{{.*}}<i32 0, i32 1>
+ // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3>
return _mm256_extractf128_si256(a, 0);
}
__m128i test_mm256_extractf128_si256_1(__m256i a) {
// CHECK-LABEL: @test_mm256_extractf128_si256_1
- // CHECK: shufflevector{{.*}}<i32 2, i32 3>
+ // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7>
return _mm256_extractf128_si256(a, 1);
}
__m128i test0_mm256_extracti128_si256_0(__m256i a) {
// CHECK-LABEL: test0_mm256_extracti128_si256
- // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 0, i32 1>
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
return _mm256_extracti128_si256(a, 0);
}
__m128i test1_mm256_extracti128_si256_1(__m256i a) {
// CHECK-LABEL: test1_mm256_extracti128_si256
- // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3>
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
return _mm256_extracti128_si256(a, 1);
}
// Immediate should be truncated to one bit.
__m128i test2_mm256_extracti128_si256(__m256i a) {
// CHECK-LABEL: test2_mm256_extracti128_si256
- // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 0, i32 1>
- return _mm256_extracti128_si256(a, 2);
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+ return _mm256_extracti128_si256(a, 0);
}
__m256i test_mm256_hadd_epi16(__m256i a, __m256i b) {
__m256i test0_mm256_inserti128_si256(__m256i a, __m128i b) {
// CHECK-LABEL: test0_mm256_inserti128_si256
- // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
return _mm256_inserti128_si256(a, b, 0);
}
__m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) {
// CHECK-LABEL: test1_mm256_inserti128_si256
- // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
return _mm256_inserti128_si256(a, b, 1);
}
// Immediate should be truncated to one bit.
__m256i test2_mm256_inserti128_si256(__m256i a, __m128i b) {
// CHECK-LABEL: test2_mm256_inserti128_si256
- // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
- return _mm256_inserti128_si256(a, b, 2);
+ return _mm256_inserti128_si256(a, b, 0);
}
__m256i test_mm256_madd_epi16(__m256i a, __m256i b) {
__m256 test_mm512_extractf32x8_ps(__m512 __A) {
// CHECK-LABEL: @test_mm512_extractf32x8_ps
- // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
return _mm512_extractf32x8_ps(__A, 1);
}
__m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) {
// CHECK-LABEL: @test_mm512_mask_extractf32x8_ps
- // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm512_mask_extractf32x8_ps(__W, __U, __A, 1);
}
__m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) {
// CHECK-LABEL: @test_mm512_maskz_extractf32x8_ps
- // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm512_maskz_extractf32x8_ps(__U, __A, 1);
}
__m128d test_mm512_extractf64x2_pd(__m512d __A) {
// CHECK-LABEL: @test_mm512_extractf64x2_pd
- // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <2 x i32> <i32 6, i32 7>
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
return _mm512_extractf64x2_pd(__A, 3);
}
__m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) {
// CHECK-LABEL: @test_mm512_mask_extractf64x2_pd
- // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <2 x i32> <i32 6, i32 7>
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm512_mask_extractf64x2_pd(__W, __U, __A, 3);
}
__m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) {
// CHECK-LABEL: @test_mm512_maskz_extractf64x2_pd
- // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <2 x i32> <i32 6, i32 7>
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm512_maskz_extractf64x2_pd(__U, __A, 3);
}
__m256i test_mm512_extracti32x8_epi32(__m512i __A) {
// CHECK-LABEL: @test_mm512_extracti32x8_epi32
- // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
return _mm512_extracti32x8_epi32(__A, 1);
}
__m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_extracti32x8_epi32
- // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm512_mask_extracti32x8_epi32(__W, __U, __A, 1);
}
__m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_extracti32x8_epi32
- // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm512_maskz_extracti32x8_epi32(__U, __A, 1);
}
__m128i test_mm512_extracti64x2_epi64(__m512i __A) {
// CHECK-LABEL: @test_mm512_extracti64x2_epi64
- // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <2 x i32> <i32 6, i32 7>
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
return _mm512_extracti64x2_epi64(__A, 3);
}
__m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_extracti64x2_epi64
- // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <2 x i32> <i32 6, i32 7>
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm512_mask_extracti64x2_epi64(__W, __U, __A, 3);
}
__m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_extracti64x2_epi64
- // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <2 x i32> <i32 6, i32 7>
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm512_maskz_extracti64x2_epi64(__U, __A, 3);
}
__m256d test_mm512_extractf64x4_pd(__m512d a)
{
// CHECK-LABEL: @test_mm512_extractf64x4_pd
- // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm512_extractf64x4_pd(a, 1);
}
__m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){
// CHECK-LABEL:@test_mm512_mask_extractf64x4_pd
- // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1);
}
__m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){
// CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd
- // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm512_maskz_extractf64x4_pd( __U, __A, 1);
}
__m128 test_mm512_extractf32x4_ps(__m512 a)
{
// CHECK-LABEL: @test_mm512_extractf32x4_ps
- // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm512_extractf32x4_ps(a, 1);
}
__m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512d __A){
// CHECK-LABEL:@test_mm512_mask_extractf32x4_ps
- // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1);
}
__m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512d __A){
// CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps
- // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm512_maskz_extractf32x4_ps( __U, __A, 1);
}
__m128i test_mm512_extracti32x4_epi32(__m512i __A) {
// CHECK-LABEL: @test_mm512_extracti32x4_epi32
- // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
return _mm512_extracti32x4_epi32(__A, 3);
}
__m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_extracti32x4_epi32
- // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3);
}
__m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_extracti32x4_epi32
- // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm512_maskz_extracti32x4_epi32(__U, __A, 3);
}
__m256i test_mm512_extracti64x4_epi64(__m512i __A) {
// CHECK-LABEL: @test_mm512_extracti64x4_epi64
- // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm512_extracti64x4_epi64(__A, 1);
}
__m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_extracti64x4_epi64
- // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1);
}
__m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_extracti64x4_epi64
- // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm512_maskz_extracti64x4_epi64(__U, __A, 1);
}
__m128 test_mm256_extractf32x4_ps(__m256 __A) {
// CHECK-LABEL: @test_mm256_extractf32x4_ps
- // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extractf32x4_ps(__A, 1);
}
__m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) {
// CHECK-LABEL: @test_mm256_mask_extractf32x4_ps
- // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm256_mask_extractf32x4_ps(__W, __U, __A, 1);
}
__m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) {
// CHECK-LABEL: @test_mm256_maskz_extractf32x4_ps
- // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm256_maskz_extractf32x4_ps(__U, __A, 1);
}
__m128i test_mm256_extracti32x4_epi32(__m256i __A) {
// CHECK-LABEL: @test_mm256_extracti32x4_epi32
- // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extracti32x4_epi32(__A, 1);
}
__m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_mask_extracti32x4_epi32
- // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm256_mask_extracti32x4_epi32(__W, __U, __A, 1);
}
__m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_maskz_extracti32x4_epi32
- // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm256_maskz_extracti32x4_epi32(__U, __A, 1);
}
__m128d test_mm256_extractf64x2_pd(__m256d __A) {
// CHECK-LABEL: @test_mm256_extractf64x2_pd
- // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <2 x i32> <i32 2, i32 3>
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
return _mm256_extractf64x2_pd(__A, 1);
}
__m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A) {
// CHECK-LABEL: @test_mm256_mask_extractf64x2_pd
- // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <2 x i32> <i32 2, i32 3>
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm256_mask_extractf64x2_pd(__W, __U, __A, 1);
}
__m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) {
// CHECK-LABEL: @test_mm256_maskz_extractf64x2_pd
- // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <2 x i32> <i32 2, i32 3>
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm256_maskz_extractf64x2_pd(__U, __A, 1);
}
__m128i test_mm256_extracti64x2_epi64(__m256i __A) {
// CHECK-LABEL: @test_mm256_extracti64x2_epi64
- // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3>
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
return _mm256_extracti64x2_epi64(__A, 1);
}
__m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_mask_extracti64x2_epi64
- // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3>
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm256_mask_extracti64x2_epi64(__W, __U, __A, 1);
}
__m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_maskz_extracti64x2_epi64
- // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3>
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm256_maskz_extracti64x2_epi64(__U, __A, 1);
}