TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V8dV8d*V8dUc", "n", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V2dV2d*V2dUc", "n", "avx512f")
TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V16fV16f*V16fUs", "n", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V4fV4f*V4fUc", "n", "avx512f")
TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "n", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi", "n", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV8d*V8dUc", "n", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV2d*V2dUc", "n", "avx512f")
TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV16f*V16fUs", "n", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV4f*V4fUc", "n", "avx512f")
TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl")
case X86::BI__builtin_ia32_storess128_mask:
case X86::BI__builtin_ia32_storesd128_mask: {
- return EmitX86MaskedStore(*this, Ops, 16);
+ return EmitX86MaskedStore(*this, Ops, 1);
}
case X86::BI__builtin_ia32_vpopcntb_128:
case X86::BI__builtin_ia32_vpopcntd_128:
case X86::BI__builtin_ia32_loadss128_mask:
case X86::BI__builtin_ia32_loadsd128_mask:
- return EmitX86MaskedLoad(*this, Ops, 16);
+ return EmitX86MaskedLoad(*this, Ops, 1);
case X86::BI__builtin_ia32_loadaps128_mask:
case X86::BI__builtin_ia32_loadaps256_mask:
static __inline__ void __DEFAULT_FN_ATTRS
_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
{
- __builtin_ia32_storess128_mask ((__v16sf *)__W,
- (__v16sf) _mm512_castps128_ps512(__A),
- (__mmask16) __U & (__mmask16)1);
+ __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
}
static __inline__ void __DEFAULT_FN_ATTRS
_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
{
- __builtin_ia32_storesd128_mask ((__v8df *)__W,
- (__v8df) _mm512_castpd128_pd512(__A),
- (__mmask8) __U & 1);
+ __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
(__v4sf) {0.0, 0.0, 0.0, 0.0},
0, 4, 4, 4);
- return (__m128) __builtin_shufflevector(
- __builtin_ia32_loadss128_mask ((__v16sf *) __A,
- (__v16sf) _mm512_castps128_ps512(src),
- (__mmask16) __U & 1),
- _mm512_undefined_ps(), 0, 1, 2, 3);
+ return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_load_ss (__mmask8 __U, const float* __A)
{
- return (__m128) __builtin_shufflevector(
- __builtin_ia32_loadss128_mask ((__v16sf *) __A,
- (__v16sf) _mm512_setzero_ps(),
- (__mmask16) __U & 1),
- _mm512_undefined_ps(), 0, 1, 2, 3);
+ return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A,
+ (__v4sf) _mm_setzero_ps(),
+ __U & 1);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
__m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
(__v2df) {0.0, 0.0}, 0, 2);
- return (__m128d) __builtin_shufflevector(
- __builtin_ia32_loadsd128_mask ((__v8df *) __A,
- (__v8df) _mm512_castpd128_pd512(src),
- (__mmask8) __U & 1),
- _mm512_undefined_pd(), 0, 1);
+ return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_load_sd (__mmask8 __U, const double* __A)
{
- return (__m128d) __builtin_shufflevector(
- __builtin_ia32_loadsd128_mask ((__v8df *) __A,
- (__v8df) _mm512_setzero_pd(),
- (__mmask8) __U & 1),
- _mm512_undefined_pd(), 0, 1);
+ return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A,
+ (__v2df) _mm_setzero_pd(),
+ __U & 1);
}
#define _mm512_shuffle_epi32(A, I) __extension__ ({ \
void test_mm_mask_store_ss(float * __P, __mmask8 __U, __m128 __A)
{
// CHECK-LABEL: @test_mm_mask_store_ss
- // CHECK: call void @llvm.masked.store.v16f32.p0v16f32(
+ // CHECK: call void @llvm.masked.store.v4f32.p0v4f32(
_mm_mask_store_ss(__P, __U, __A);
}
void test_mm_mask_store_sd(double * __P, __mmask8 __U, __m128d __A)
{
// CHECK-LABEL: @test_mm_mask_store_sd
- // CHECK: call void @llvm.masked.store.v8f64.p0v8f64(
+ // CHECK: call void @llvm.masked.store.v2f64.p0v2f64(
_mm_mask_store_sd(__P, __U, __A);
}
__m128 test_mm_mask_load_ss(__m128 __A, __mmask8 __U, const float* __W)
{
// CHECK-LABEL: @test_mm_mask_load_ss
- // CHECK: call <16 x float> @llvm.masked.load.v16f32.p0v16f32(
+ // CHECK: call <4 x float> @llvm.masked.load.v4f32.p0v4f32(
return _mm_mask_load_ss(__A, __U, __W);
}
__m128 test_mm_maskz_load_ss (__mmask8 __U, const float * __W)
{
// CHECK-LABEL: @test_mm_maskz_load_ss
- // CHECK: call <16 x float> @llvm.masked.load.v16f32.p0v16f32(
+ // CHECK: call <4 x float> @llvm.masked.load.v4f32.p0v4f32(
return _mm_maskz_load_ss (__U, __W);
}
__m128d test_mm_mask_load_sd (__m128d __A, __mmask8 __U, const double * __W)
{
// CHECK-LABEL: @test_mm_mask_load_sd
- // CHECK: call <8 x double> @llvm.masked.load.v8f64.p0v8f64(
+ // CHECK: call <2 x double> @llvm.masked.load.v2f64.p0v2f64(
return _mm_mask_load_sd (__A, __U, __W);
}
__m128d test_mm_maskz_load_sd (__mmask8 __U, const double * __W)
{
// CHECK-LABEL: @test_mm_maskz_load_sd
- // CHECK: call <8 x double> @llvm.masked.load.v8f64.p0v8f64(
+ // CHECK: call <2 x double> @llvm.masked.load.v2f64.p0v2f64(
return _mm_maskz_load_sd (__U, __W);
}