From: Craig Topper Date: Wed, 23 Nov 2016 01:47:12 +0000 (+0000) Subject: [X86] Replace valignd/q builtins with appropriate __builtin_shufflevector. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5c8cc80b1e276595f78a1a86dd6be1ea4bd8bd63;p=clang [X86] Replace valignd/q builtins with appropriate __builtin_shufflevector. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@287733 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 1357f39e87..9b3d942bf9 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -996,12 +996,6 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2vard512_mask, "V16iV16iV16iV16iUs", "", "av TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_alignq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_alignd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_alignd128_mask, "V4iV4iV4iIiV4iUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_alignd256_mask, "V8iV8iV8iIiV8iUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_alignq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_alignq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUci","","avx512vl") TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUci","","avx512vl") diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index aaaa7eb2d8..86999574c8 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -3416,40 +3416,56 @@ _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, } #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ - (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), (int)(I), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1); }) + (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \ + (__v8di)(__m512i)(A), \ + ((int)(I) & 0x7) + 0, \ + ((int)(I) & 0x7) + 1, \ + ((int)(I) & 0x7) + 2, \ + ((int)(I) & 0x7) + 3, \ + ((int)(I) & 0x7) + 4, \ + ((int)(I) & 0x7) + 5, \ + ((int)(I) & 0x7) + 6, \ + ((int)(I) & 0x7) + 7); }) #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\ - (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), (int)(imm), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U)); }) + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ + (__v8di)(__m512i)(W)); }) #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\ - (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), (int)(imm), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)); }) + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ + (__v8di)_mm512_setzero_si512()); }) #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ - (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), (int)(I), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1); }) + (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \ + (__v16si)(__m512i)(A), \ + ((int)(I) & 0xf) + 0, \ + ((int)(I) & 0xf) + 1, \ + ((int)(I) & 0xf) + 2, \ + ((int)(I) & 0xf) + 3, \ + ((int)(I) & 0xf) + 4, \ + ((int)(I) & 0xf) + 5, \ + ((int)(I) & 0xf) + 6, \ + ((int)(I) & 0xf) + 7, \ + ((int)(I) & 0xf) + 8, \ + ((int)(I) & 0xf) + 9, \ + ((int)(I) & 0xf) + 10, \ + ((int)(I) & 0xf) + 11, \ + ((int)(I) & 0xf) + 12, \ + ((int)(I) & 0xf) + 13, \ + ((int)(I) & 0xf) + 14, \ + ((int)(I) & 0xf) + 15); }) #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\ - (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), (int)(imm), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U)); }) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ + (__v16si)(__m512i)(W)); }) #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\ - (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), (int)(imm), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)); }) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ + (__v16si)_mm512_setzero_si512()); }) /* Vector Extract */ #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index 0f4e1b2f71..0974fc89ad 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -8638,76 +8638,78 @@ _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y) } #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (int)(imm), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1); }) + (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \ + (__v4si)(__m128i)(A), \ + ((int)(imm) & 0x3) + 0, \ + ((int)(imm) & 0x3) + 1, \ + ((int)(imm) & 0x3) + 2, \ + ((int)(imm) & 0x3) + 3); }) #define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (int)(imm), \ - (__v4si)(__m128i)(W), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ + (__v4si)(__m128i)(W)); }) #define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (int)(imm), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ + (__v4si)_mm_setzero_si128()); }) #define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), (int)(imm), \ - (__v8si)_mm256_undefined_si256(), \ - (__mmask8)-1); }) + (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \ + (__v8si)(__m256i)(A), \ + ((int)(imm) & 0x7) + 0, \ + ((int)(imm) & 0x7) + 1, \ + ((int)(imm) & 0x7) + 2, \ + ((int)(imm) & 0x7) + 3, \ + ((int)(imm) & 0x7) + 4, \ + ((int)(imm) & 0x7) + 5, \ + ((int)(imm) & 0x7) + 6, \ + ((int)(imm) & 0x7) + 7); }) #define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), (int)(imm), \ - (__v8si)(__m256i)(W), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ + (__v8si)(__m256i)(W)); }) #define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), (int)(imm), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ + (__v8si)_mm256_setzero_si256()); }) #define _mm_alignr_epi64(A, B, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (int)(imm), \ - (__v2di)_mm_setzero_di(), \ - (__mmask8)-1); }) + (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \ + (__v2di)(__m128i)(A), \ + ((int)(imm) & 0x1) + 0, \ + ((int)(imm) & 0x1) + 1); }) #define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (int)(imm), \ - (__v2di)(__m128i)(W), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ + (__v2di)(__m128i)(W)); }) #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (int)(imm), \ - (__v2di)_mm_setzero_di(), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ + (__v2di)_mm_setzero_di()); }) #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), (int)(imm), \ - (__v4di)_mm256_undefined_pd(), \ - (__mmask8)-1); }) + (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \ + (__v4di)(__m256i)(A), \ + ((int)(imm) & 0x3) + 0, \ + ((int)(imm) & 0x3) + 1, \ + ((int)(imm) & 0x3) + 2, \ + ((int)(imm) & 0x3) + 3); }) #define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), (int)(imm), \ - (__v4di)(__m256i)(W), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ + (__v4di)(__m256i)(W)); }) #define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), (int)(imm), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ + (__v4di)_mm256_setzero_si256()); }) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 3ae540f53b..843ff43bdb 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -2155,12 +2155,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: case X86::BI__builtin_ia32_palignr512_mask: - case X86::BI__builtin_ia32_alignq512_mask: - case X86::BI__builtin_ia32_alignd512_mask: - case X86::BI__builtin_ia32_alignd128_mask: - case X86::BI__builtin_ia32_alignd256_mask: - case X86::BI__builtin_ia32_alignq128_mask: - case X86::BI__builtin_ia32_alignq256_mask: case X86::BI__builtin_ia32_vcomisd: case X86::BI__builtin_ia32_vcomiss: case X86::BI__builtin_ia32_shuf_f32x4_mask: diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 13062bb18b..b5a69ddd9d 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -393,42 +393,46 @@ __mmask16 test_mm512_knot(__mmask16 a) __m512i test_mm512_alignr_epi32(__m512i a, __m512i b) { // CHECK-LABEL: @test_mm512_alignr_epi32 - // CHECK: @llvm.x86.avx512.mask.valign.d.512 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> return _mm512_alignr_epi32(a, b, 2); } __m512i test_mm512_mask_alignr_epi32(__m512i w, __mmask16 u, __m512i a, __m512i b) { // CHECK-LABEL: @test_mm512_mask_alignr_epi32 - // CHECK: @llvm.x86.avx512.mask.valign.d.512 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> {{.*}} return _mm512_mask_alignr_epi32(w, u, a, b, 2); } __m512i test_mm512_maskz_alignr_epi32( __mmask16 u, __m512i a, __m512i b) { // CHECK-LABEL: @test_mm512_maskz_alignr_epi32 - // CHECK: @llvm.x86.avx512.mask.valign.d.512 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> {{.*}} return _mm512_maskz_alignr_epi32(u, a, b, 2); } __m512i test_mm512_alignr_epi64(__m512i a, __m512i b) { // CHECK-LABEL: @test_mm512_alignr_epi64 - // CHECK: @llvm.x86.avx512.mask.valign.q.512 + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> return _mm512_alignr_epi64(a, b, 2); } __m512i test_mm512_mask_alignr_epi64(__m512i w, __mmask8 u, __m512i a, __m512i b) { // CHECK-LABEL: @test_mm512_mask_alignr_epi64 - // CHECK: @llvm.x86.avx512.mask.valign.q.512 + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> {{.*}} return _mm512_mask_alignr_epi64(w, u, a, b, 2); } __m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b) { // CHECK-LABEL: @test_mm512_maskz_alignr_epi64 - // CHECK: @llvm.x86.avx512.mask.valign.q.512 + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> {{.*}} return _mm512_maskz_alignr_epi64(u, a, b, 2); } diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c index c330357dee..8068fc2aca 100644 --- a/test/CodeGen/avx512vl-builtins.c +++ b/test/CodeGen/avx512vl-builtins.c @@ -6934,73 +6934,80 @@ __m256i test_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X __m128i test_mm_alignr_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_alignr_epi32 - // CHECK: @llvm.x86.avx512.mask.valign.d.128 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> return _mm_alignr_epi32(__A, __B, 1); } __m128i test_mm_mask_alignr_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_alignr_epi32 - // CHECK: @llvm.x86.avx512.mask.valign.d.128 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_alignr_epi32(__W, __U, __A, __B, 1); } __m128i test_mm_maskz_alignr_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_alignr_epi32 - // CHECK: @llvm.x86.avx512.mask.valign.d.128 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_alignr_epi32(__U, __A, __B, 1); } __m256i test_mm256_alignr_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_alignr_epi32 - // CHECK: @llvm.x86.avx512.mask.valign.d.256 + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> return _mm256_alignr_epi32(__A, __B, 1); } __m256i test_mm256_mask_alignr_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_alignr_epi32 - // CHECK: @llvm.x86.avx512.mask.valign.d.256 + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_alignr_epi32(__W, __U, __A, __B, 1); } __m256i test_mm256_maskz_alignr_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_alignr_epi32 - // CHECK: @llvm.x86.avx512.mask.valign.d.256 + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_alignr_epi32(__U, __A, __B, 1); } __m128i test_mm_alignr_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_alignr_epi64 - // CHECK: @llvm.x86.avx512.mask.valign.q.128 + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> return _mm_alignr_epi64(__A, __B, 1); } __m128i test_mm_mask_alignr_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_alignr_epi64 - // CHECK: @llvm.x86.avx512.mask.valign.q.128 + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_alignr_epi64(__W, __U, __A, __B, 1); } __m128i test_mm_maskz_alignr_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_alignr_epi64 - // CHECK: @llvm.x86.avx512.mask.valign.q.128 + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_alignr_epi64(__U, __A, __B, 1); } __m256i test_mm256_alignr_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_alignr_epi64 - // CHECK: @llvm.x86.avx512.mask.valign.q.256 + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> return _mm256_alignr_epi64(__A, __B, 1); } __m256i test_mm256_mask_alignr_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_alignr_epi64 - // CHECK: @llvm.x86.avx512.mask.valign.q.256 + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_alignr_epi64(__W, __U, __A, __B, 1); } __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_alignr_epi64 - // CHECK: @llvm.x86.avx512.mask.valign.q.256 + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_alignr_epi64(__U, __A, __B, 1); }