return _mm256_extract_epi32(A, 7);
}
+#if __x86_64__
long long test_mm256_extract_epi64(__m256i A) {
// CHECK-LABEL: test_mm256_extract_epi64
// CHECK: extractelement <4 x i64> %{{.*}}, {{i32|i64}} 3
return _mm256_extract_epi64(A, 3);
}
+#endif
__m128d test_mm256_extractf128_pd(__m256d A) {
// CHECK-LABEL: test_mm256_extractf128_pd
return _mm256_insert_epi32(x, b, 5);
}
+#if __x86_64__
__m256i test_mm256_insert_epi64(__m256i x, long long b) {
// CHECK-LABEL: test_mm256_insert_epi64
// CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 2
return _mm256_insert_epi64(x, b, 2);
}
+#endif
__m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
// CHECK-LABEL: test_mm256_insertf128_pd
return _mm_tzcnt_32(__X);
}
+#ifdef __x86_64__
unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) {
// CHECK-LABEL: test__andn_u64
// CHECK: xor i64 %{{.*}}, -1
// CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
return _mm_tzcnt_64(__X);
}
+#endif
// Intel intrinsics
return _tzcnt_u32(__X);
}
+#ifdef __x86_64__
unsigned long long test_andn_u64(unsigned long __X, unsigned long __Y) {
// CHECK-LABEL: test_andn_u64
// CHECK: xor i64 %{{.*}}, -1
// CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
return _tzcnt_u64(__X);
}
+#endif
return _mulx_u32(__X, __Y, __P);
}
+#ifdef __x86_64__
unsigned long long test_bzhi_u64(unsigned long long __X, unsigned long long __Y) {
// CHECK: @llvm.x86.bmi.bzhi.64
return _bzhi_u64(__X, __Y);
// CHECK: mul i128
return _mulx_u64(__X, __Y, __P);
}
+#endif
return __popcntd(__X);
}
+#ifdef __x86_64__
#ifdef __POPCNT__
long long test_mm_popcnt_u64(unsigned long long __X) {
//CHECK-POPCNT: call i64 @llvm.ctpop.i64
//CHECK: call i64 @llvm.ctpop.i64
return __popcntq(__X);
}
+#endif
// CHECK: store i32
}
+#if __x86_64__
int rdrand64(unsigned long long *p) {
return _rdrand64_step(p);
// CHECK: @rdrand64
// CHECK: call { i64, i32 } @llvm.x86.rdrand.64
// CHECK: store i64
}
+#endif
int rdseed16(unsigned short *p) {
return _rdseed16_step(p);
// CHECK: store i32
}
+#if __x86_64__
int rdseed64(unsigned long long *p) {
return _rdseed64_step(p);
// CHECK: @rdseed64
// CHECK: call { i64, i32 } @llvm.x86.rdseed.64
// CHECK: store i64
}
+#endif
return _mm_cvtsi32_ss(A, B);
}
+#ifdef __x86_64__
__m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
// CHECK-LABEL: test_mm_cvtsi64_ss
// CHECK: sitofp i64 %{{.*}} to float
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_cvtsi64_ss(A, B);
}
+#endif
float test_mm_cvtss_f32(__m128 A) {
// CHECK-LABEL: test_mm_cvtss_f32
return _mm_cvtss_si32(A);
}
+#ifdef __x86_64__
long long test_mm_cvtss_si64(__m128 A) {
// CHECK-LABEL: test_mm_cvtss_si64
// CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}})
return _mm_cvtss_si64(A);
}
+#endif
int test_mm_cvtt_ss2si(__m128 A) {
// CHECK-LABEL: test_mm_cvtt_ss2si
return _mm_cvttss_si32(A);
}
+#ifdef __x86_64__
long long test_mm_cvttss_si64(__m128 A) {
// CHECK-LABEL: test_mm_cvttss_si64
// CHECK: call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}})
return _mm_cvttss_si64(A);
}
+#endif
__m128 test_mm_div_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_div_ps
return _mm_cvtsd_si32(A);
}
+#ifdef __x86_64__
long long test_mm_cvtsd_si64(__m128d A) {
// CHECK-LABEL: test_mm_cvtsd_si64
// CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
return _mm_cvtsd_si64(A);
}
+#endif
__m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
// CHECK-LABEL: test_mm_cvtsd_ss
return _mm_cvtsi128_si32(A);
}
+#ifdef __x86_64__
long long test_mm_cvtsi128_si64(__m128i A) {
// CHECK-LABEL: test_mm_cvtsi128_si64
// CHECK: extractelement <2 x i64> %{{.*}}, i32 0
return _mm_cvtsi128_si64(A);
}
+#endif
__m128d test_mm_cvtsi32_sd(__m128d A, int B) {
// CHECK-LABEL: test_mm_cvtsi32_sd
return _mm_cvtsi32_si128(A);
}
+#ifdef __x86_64__
__m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
// CHECK-LABEL: test_mm_cvtsi64_sd
// CHECK: sitofp i64 %{{.*}} to double
// CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
return _mm_cvtsi64_si128(A);
}
+#endif
__m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
// CHECK-LABEL: test_mm_cvtss_sd
return _mm_cvttsd_si32(A);
}
+#ifdef __x86_64__
long long test_mm_cvttsd_si64(__m128d A) {
// CHECK-LABEL: test_mm_cvttsd_si64
// CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}})
return _mm_cvttsd_si64(A);
}
+#endif
__m128d test_mm_div_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_div_pd
_mm_stream_si32(A, B);
}
+#ifdef __x86_64__
void test_mm_stream_si64(long long *A, long long B) {
// CHECK-LABEL: test_mm_stream_si64
// CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
_mm_stream_si64(A, B);
}
+#endif
void test_mm_stream_si128(__m128i *A, __m128i B) {
// CHECK-LABEL: test_mm_stream_si128
return _mm_extract_epi32(x, 1);
}
+#ifdef __x86_64__
long long test_mm_extract_epi64(__m128i x) {
// CHECK-LABEL: test_mm_extract_epi64
// CHECK: extractelement <2 x i64> %{{.*}}, {{i32|i64}} 1
return _mm_extract_epi64(x, 1);
}
+#endif
int test_mm_extract_ps(__m128 x) {
// CHECK-LABEL: test_mm_extract_ps
return _mm_insert_epi32(x, b, 1);
}
+#ifdef __x86_64__
__m128i test_mm_insert_epi64(__m128i x, long long b) {
// CHECK-LABEL: test_mm_insert_epi64
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 1
return _mm_insert_epi64(x, b, 1);
}
+#endif
__m128 test_mm_insert_ps(__m128 x, __m128 y) {
// CHECK-LABEL: test_mm_insert_ps
return _mm_crc32_u32(CRC, V);
}
+#ifdef __x86_64__
unsigned long long test_mm_crc32_u64(unsigned long long CRC, unsigned long long V) {
// CHECK-LABEL: test_mm_crc32_u64
// CHECK: call i64 @llvm.x86.sse42.crc32.64.64(i64 %{{.*}}, i64 %{{.*}})
return _mm_crc32_u64(CRC, V);
}
+#endif
return __bextri_u32(a, 1);
}
+#ifdef __x86_64__
unsigned long long test__bextri_u64(unsigned long long a) {
// CHECK-LABEL: test__bextri_u64
// CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2)
// CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887)
return __bextri_u64(a, 0x7fffffffffLL);
}
+#endif
unsigned int test__blcfill_u32(unsigned int a) {
// CHECK-LABEL: test__blcfill_u32
return __blcfill_u32(a);
}
+#ifdef __x86_64__
unsigned long long test__blcfill_u64(unsigned long long a) {
// CHECK-LABEL: test__blcfill_u64
// CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1
// CHECK: %{{.*}} = and i64 %{{.*}}, [[TMP]]
return __blcfill_u64(a);
}
+#endif
unsigned int test__blci_u32(unsigned int a) {
// CHECK-LABEL: test__blci_u32
return __blci_u32(a);
}
+#ifdef __x86_64__
unsigned long long test__blci_u64(unsigned long long a) {
// CHECK-LABEL: test__blci_u64
// CHECK: [[TMP1:%.*]] = add i64 %{{.*}}, 1
// CHECK: %{{.*}} = or i64 %{{.*}}, [[TMP2]]
return __blci_u64(a);
}
+#endif
unsigned int test__blcic_u32(unsigned int a) {
// CHECK-LABEL: test__blcic_u32
return __blcic_u32(a);
}
+#ifdef __x86_64__
unsigned long long test__blcic_u64(unsigned long long a) {
// CHECK-LABEL: test__blcic_u64
// CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
// CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]]
return __blcic_u64(a);
}
+#endif
unsigned int test__blcmsk_u32(unsigned int a) {
// CHECK-LABEL: test__blcmsk_u32
return __blcmsk_u32(a);
}
+#ifdef __x86_64__
unsigned long long test__blcmsk_u64(unsigned long long a) {
// CHECK-LABEL: test__blcmsk_u64
// CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1
// CHECK-NEXT: {{.*}} = xor i64 %{{.*}}, [[TMP]]
return __blcmsk_u64(a);
}
+#endif
unsigned int test__blcs_u32(unsigned int a) {
// CHECK-LABEL: test__blcs_u32
return __blcs_u32(a);
}
+#ifdef __x86_64__
unsigned long long test__blcs_u64(unsigned long long a) {
// CHECK-LABEL: test__blcs_u64
// CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1
// CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]]
return __blcs_u64(a);
}
+#endif
unsigned int test__blsfill_u32(unsigned int a) {
// CHECK-LABEL: test__blsfill_u32
return __blsfill_u32(a);
}
+#ifdef __x86_64__
unsigned long long test__blsfill_u64(unsigned long long a) {
// CHECK-LABEL: test__blsfill_u64
// CHECK: [[TMP:%.*]] = sub i64 %{{.*}}, 1
// CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]]
return __blsfill_u64(a);
}
+#endif
unsigned int test__blsic_u32(unsigned int a) {
// CHECK-LABEL: test__blsic_u32
return __blsic_u32(a);
}
+#ifdef __x86_64__
unsigned long long test__blsic_u64(unsigned long long a) {
// CHECK-LABEL: test__blsic_u64
// CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
// CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]]
return __blsic_u64(a);
}
+#endif
unsigned int test__t1mskc_u32(unsigned int a) {
// CHECK-LABEL: test__t1mskc_u32
return __t1mskc_u32(a);
}
+#ifdef __x86_64__
unsigned long long test__t1mskc_u64(unsigned long long a) {
// CHECK-LABEL: test__t1mskc_u64
// CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
// CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]]
return __t1mskc_u64(a);
}
+#endif
unsigned int test__tzmsk_u32(unsigned int a) {
// CHECK-LABEL: test__tzmsk_u32
return __tzmsk_u32(a);
}
+#ifdef __x86_64__
unsigned long long test__tzmsk_u64(unsigned long long a) {
// CHECK-LABEL: test__tzmsk_u64
// CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
// CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]]
return __tzmsk_u64(a);
}
+#endif