From 4eee89ed21e179cc1659c3d82c6acf0ce2a9d2c9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 21 Jun 2016 03:53:24 +0000 Subject: [PATCH] [AVX512] Remove the masked vpcmpeq/vcmpgt intrinsics and autoupgrade them to native icmps. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273240 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 77 ------- lib/IR/AutoUpgrade.cpp | 30 +++ lib/Target/X86/X86IntrinsicsInfo.h | 24 --- test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 92 +++++++++ test/CodeGen/X86/avx512-intrinsics.ll | 92 --------- .../X86/avx512bw-intrinsics-upgrade.ll | 169 +++++++++++++++ test/CodeGen/X86/avx512bw-intrinsics.ll | 188 +---------------- .../X86/avx512bwvl-intrinsics-upgrade.ll | 93 +++++++++ test/CodeGen/X86/avx512bwvl-intrinsics.ll | 92 --------- .../X86/avx512vl-intrinsics-upgrade.ll | 184 +++++++++++++++++ test/CodeGen/X86/avx512vl-intrinsics.ll | 193 +----------------- 11 files changed, 581 insertions(+), 653 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 547e575c411..cb66c419793 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -6963,31 +6963,6 @@ let TargetPrefix = "x86" in { def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pcmpeq_b_512 : - Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpeq_w_512 : - Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpeq_d_512 : - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpeq_q_512 : - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pcmpgt_b_512: - Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpgt_w_512: - Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpgt_d_512: - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpgt_q_512: - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem]>; def int_x86_avx512_mask_cmp_b_512: GCCBuiltin<"__builtin_ia32_cmpb512_mask">, Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, @@ -7016,32 +6991,6 @@ let TargetPrefix = "x86" in { llvm_i8_ty], [IntrNoMem]>; // 256-bit - def int_x86_avx512_mask_pcmpeq_b_256 : - Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpeq_w_256 : - Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpeq_d_256 : - Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpeq_q_256 : - Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pcmpgt_b_256: - Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpgt_w_256: - Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpgt_d_256: - Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpgt_q_256: - Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_cmp_b_256: GCCBuiltin<"__builtin_ia32_cmpb256_mask">, Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; @@ -7069,32 +7018,6 @@ let TargetPrefix = "x86" in { llvm_i8_ty], [IntrNoMem]>; // 128-bit - def int_x86_avx512_mask_pcmpeq_b_128 : - Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpeq_w_128 : - Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpeq_d_128 : - Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpeq_q_128 : - Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pcmpgt_b_128: - Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpgt_w_128: - Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpgt_d_128: - Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpgt_q_128: - Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_cmp_b_128: GCCBuiltin<"__builtin_ia32_cmpb128_mask">, Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i16_ty], [IntrNoMem]>; diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 733eb85cb5b..a4fc03cf029 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -174,6 +174,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.startswith("x86.sse2.pcmpgt.") || Name.startswith("x86.avx2.pcmpeq.") || Name.startswith("x86.avx2.pcmpgt.") || + Name.startswith("x86.avx512.mask.pcmpeq.") || + Name.startswith("x86.avx512.mask.pcmpgt.") || Name == "x86.sse41.pmaxsb" || Name == "x86.sse2.pmaxs.w" || Name == "x86.sse41.pmaxsd" || @@ -541,6 +543,30 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, return Builder.CreateSelect(Cmp, Op0, Op1); } +static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, + ICmpInst::Predicate Pred) { + Value *Op0 = CI.getArgOperand(0); + unsigned NumElts = Op0->getType()->getVectorNumElements(); + Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); + + Value *Mask = CI.getArgOperand(2); + const auto *C = dyn_cast(Mask); + if (!C || !C->isAllOnesValue()) + Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts)); + + if (NumElts < 8) { + uint32_t Indices[8]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + for (unsigned i = NumElts; i != 8; ++i) + Indices[i] = NumElts; + Cmp = Builder.CreateShuffleVector(Cmp, UndefValue::get(Cmp->getType()), + Indices); + } + return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(), + std::max(NumElts, 8U))); +} + /// Upgrade a call to an old intrinsic. All argument and return casting must be /// provided to seamlessly integrate with existing context. void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { @@ -567,6 +593,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), "pcmpgt"); Rep = Builder.CreateSExt(Rep, CI->getType(), ""); + } else if (Name.startswith("llvm.x86.avx512.mask.pcmpeq.")) { + Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ); + } else if (Name.startswith("llvm.x86.avx512.mask.pcmpgt.")) { + Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT); } else if (Name == "llvm.x86.sse41.pmaxsb" || Name == "llvm.x86.sse2.pmaxs.w" || Name == "llvm.x86.sse41.pmaxsd" || diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 971f54f607c..071709a1a73 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -948,30 +948,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VBROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_pbroadcast_w_gpr_512, INTR_TYPE_1OP_MASK, X86ISD::VBROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_128, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_256, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_512, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_128, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_256, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_512, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_128, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_256, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_512, CMP_MASK, X86ISD::PCMPEQM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_128, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_256, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_512, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_128, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_256, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_512, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_128, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_256, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_512, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_mask_perm_df_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VPERMI, 0), X86_INTRINSIC_DATA(avx512_mask_perm_df_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VPERMI, 0), X86_INTRINSIC_DATA(avx512_mask_perm_di_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VPERMI, 0), diff --git a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index 50b9c712ac8..d8ebd5538ee 100644 --- a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -276,3 +276,95 @@ define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1, %res4 = add <16 x i32> %res3, %res2 ret <16 x i32> %res4 } + +define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) { +; CHECK-LABEL: test_pcmpeq_d: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq + %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) + ret i16 %res +} + +define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_d: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq + %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) + ret i16 %res +} + +declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16) + +define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) { +; CHECK-LABEL: test_pcmpeq_q: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_q: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8) + +define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) { +; CHECK-LABEL: test_pcmpgt_d: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq + %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) + ret i16 %res +} + +define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { +; CHECK-LABEL: test_mask_pcmpgt_d: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq + %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) + ret i16 %res +} + +declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16) + +define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) { +; CHECK-LABEL: test_pcmpgt_q: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { +; CHECK-LABEL: test_mask_pcmpgt_q: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8) diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 22dd7ee5887..d44a0fdc01a 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -970,98 +970,6 @@ define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) { declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 ) -define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) { -; CHECK-LABEL: test_pcmpeq_d: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq - %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) - ret i16 %res -} - -define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { -; CHECK-LABEL: test_mask_pcmpeq_d: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq - %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) - ret i16 %res -} - -declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16) - -define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) { -; CHECK-LABEL: test_pcmpeq_q: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq - %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) - ret i8 %res -} - -define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { -; CHECK-LABEL: test_mask_pcmpeq_q: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq - %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8) - -define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) { -; CHECK-LABEL: test_pcmpgt_d: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq - %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) - ret i16 %res -} - -define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { -; CHECK-LABEL: test_mask_pcmpgt_d: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq - %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) - ret i16 %res -} - -declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16) - -define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) { -; CHECK-LABEL: test_pcmpgt_q: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq - %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) - ret i8 %res -} - -define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { -; CHECK-LABEL: test_mask_pcmpgt_q: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq - %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8) - define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: test_cmp_d_512: ; CHECK: ## BB#0: diff --git a/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index ecd65908186..9c70139ce59 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -271,3 +271,172 @@ define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1, %res4 = add <32 x i16> %res3, %res2 ret <32 x i16> %res4 } + +define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { +; AVX512BW-LABEL: test_pcmpeq_b: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_pcmpeq_b: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .Ltmp0: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) + ret i64 %res +} + +define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { +; AVX512BW-LABEL: test_mask_pcmpeq_b: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovq %rdi, %k1 +; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_pcmpeq_b: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .Ltmp1: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) + ret i64 %res +} + +declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) + +define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { +; AVX512BW-LABEL: test_pcmpeq_w: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_pcmpeq_w: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) + ret i32 %res +} + +define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { +; AVX512BW-LABEL: test_mask_pcmpeq_w: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_pcmpeq_w: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32) + +define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) { +; AVX512BW-LABEL: test_pcmpgt_b: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_pcmpgt_b: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .Ltmp2: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) + ret i64 %res +} + +define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { +; AVX512BW-LABEL: test_mask_pcmpgt_b: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovq %rdi, %k1 +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_pcmpgt_b: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .Ltmp3: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) + ret i64 %res +} + +declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) + +define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { +; AVX512BW-LABEL: test_pcmpgt_w: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_pcmpgt_w: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) + ret i32 %res +} + +define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { +; AVX512BW-LABEL: test_mask_pcmpgt_w: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_pcmpgt_w: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) + diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 3d3bd29e7f0..b5e586e2bda 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -2,178 +2,6 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32 -define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { -; AVX512BW-LABEL: test_pcmpeq_b: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 -; AVX512BW-NEXT: kmovq %k0, %rax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_pcmpeq_b: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: subl $12, %esp -; AVX512F-32-NEXT: .Ltmp0: -; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 -; AVX512F-32-NEXT: kmovq %k0, (%esp) -; AVX512F-32-NEXT: movl (%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: addl $12, %esp -; AVX512F-32-NEXT: retl - %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) - ret i64 %res -} - -define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { -; AVX512BW-LABEL: test_mask_pcmpeq_b: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovq %rdi, %k1 -; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} -; AVX512BW-NEXT: kmovq %k0, %rax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_mask_pcmpeq_b: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: subl $12, %esp -; AVX512F-32-NEXT: .Ltmp1: -; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 -; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kmovq %k0, (%esp) -; AVX512F-32-NEXT: movl (%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: addl $12, %esp -; AVX512F-32-NEXT: retl - %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) - ret i64 %res -} - -declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) - -define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { -; AVX512BW-LABEL: test_pcmpeq_w: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_pcmpeq_w: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: retl - %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) - ret i32 %res -} - -define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { -; AVX512BW-LABEL: test_mask_pcmpeq_w: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovd %edi, %k1 -; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_mask_pcmpeq_w: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: retl - %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) - ret i32 %res -} - -declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32) - -define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) { -; AVX512BW-LABEL: test_pcmpgt_b: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 -; AVX512BW-NEXT: kmovq %k0, %rax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_pcmpgt_b: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: subl $12, %esp -; AVX512F-32-NEXT: .Ltmp2: -; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 -; AVX512F-32-NEXT: kmovq %k0, (%esp) -; AVX512F-32-NEXT: movl (%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: addl $12, %esp -; AVX512F-32-NEXT: retl - %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) - ret i64 %res -} - -define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { -; AVX512BW-LABEL: test_mask_pcmpgt_b: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovq %rdi, %k1 -; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} -; AVX512BW-NEXT: kmovq %k0, %rax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_mask_pcmpgt_b: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: subl $12, %esp -; AVX512F-32-NEXT: .Ltmp3: -; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 -; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kmovq %k0, (%esp) -; AVX512F-32-NEXT: movl (%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: addl $12, %esp -; AVX512F-32-NEXT: retl - %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) - ret i64 %res -} - -declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) - -define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { -; AVX512BW-LABEL: test_pcmpgt_w: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_pcmpgt_w: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: retl - %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) - ret i32 %res -} - -define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { -; AVX512BW-LABEL: test_mask_pcmpgt_w: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovd %edi, %k1 -; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_mask_pcmpgt_w: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: retl - %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) - ret i32 %res -} - -declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) - define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { ; AVX512BW-LABEL: test_cmp_b_512: ; AVX512BW: ## BB#0: @@ -205,7 +33,7 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { ; AVX512F-32-LABEL: test_cmp_b_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: subl $68, %esp -; AVX512F-32-NEXT: .Ltmp4: +; AVX512F-32-NEXT: .Ltmp0: ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) @@ -291,7 +119,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-LABEL: test_mask_cmp_b_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: subl $68, %esp -; AVX512F-32-NEXT: .Ltmp5: +; AVX512F-32-NEXT: .Ltmp1: ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 @@ -381,7 +209,7 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { ; AVX512F-32-LABEL: test_ucmp_b_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: subl $68, %esp -; AVX512F-32-NEXT: .Ltmp6: +; AVX512F-32-NEXT: .Ltmp2: ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 ; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) @@ -467,7 +295,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: subl $68, %esp -; AVX512F-32-NEXT: .Ltmp7: +; AVX512F-32-NEXT: .Ltmp3: ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 @@ -2661,7 +2489,7 @@ define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { ; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: subl $12, %esp -; AVX512F-32-NEXT: .Ltmp8: +; AVX512F-32-NEXT: .Ltmp4: ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 @@ -2687,7 +2515,7 @@ define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) { ; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: subl $12, %esp -; AVX512F-32-NEXT: .Ltmp9: +; AVX512F-32-NEXT: .Ltmp5: ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 ; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, (%esp) @@ -3149,7 +2977,7 @@ define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x ; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: subl $20, %esp -; AVX512F-32-NEXT: .Ltmp10: +; AVX512F-32-NEXT: .Ltmp6: ; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 @@ -3214,7 +3042,7 @@ define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 % ; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: subl $20, %esp -; AVX512F-32-NEXT: .Ltmp11: +; AVX512F-32-NEXT: .Ltmp7: ; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll index 811c063333d..6520805262c 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -266,3 +266,96 @@ define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1, %res4 = add <16 x i16> %res3, %res2 ret <16 x i16> %res4 } + +define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: test_pcmpeq_b_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) + ret i32 %res +} + +define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_b_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32) + +define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: test_pcmpeq_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) + ret i16 %res +} + +define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) + ret i16 %res +} + +declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16) + +define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: test_pcmpgt_b_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) + ret i32 %res +} + +define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { +; CHECK-LABEL: test_mask_pcmpgt_b_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32) + +define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: test_pcmpgt_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) + ret i16 %res +} + +define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { +; CHECK-LABEL: test_mask_pcmpgt_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) + ret i16 %res +} + +declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16) + diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 75334580e0d..37174e73f38 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -3,98 +3,6 @@ ; 256-bit -define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) { -; CHECK-LABEL: test_pcmpeq_b_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) - ret i32 %res -} - -define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { -; CHECK-LABEL: test_mask_pcmpeq_b_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) - ret i32 %res -} - -declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32) - -define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) { -; CHECK-LABEL: test_pcmpeq_w_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) - ret i16 %res -} - -define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { -; CHECK-LABEL: test_mask_pcmpeq_w_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) - ret i16 %res -} - -declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16) - -define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) { -; CHECK-LABEL: test_pcmpgt_b_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) - ret i32 %res -} - -define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { -; CHECK-LABEL: test_mask_pcmpgt_b_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) - ret i32 %res -} - -declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32) - -define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) { -; CHECK-LABEL: test_pcmpgt_w_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) - ret i16 %res -} - -define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { -; CHECK-LABEL: test_mask_pcmpgt_w_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) - ret i16 %res -} - -declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16) - define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { ; CHECK-LABEL: test_cmp_b_256: ; CHECK: ## BB#0: diff --git a/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index 9adf6c91592..3089e598893 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -558,3 +558,187 @@ define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 %res4 = add <8 x i32> %res3, %res2 ret <8 x i32> %res4 } + +define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: test_pcmpeq_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8) + +define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: test_pcmpeq_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8) + +define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: test_pcmpgt_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { +; CHECK-LABEL: test_mask_pcmpgt_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8) + +define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: test_pcmpgt_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { +; CHECK-LABEL: test_mask_pcmpgt_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8) + +define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_pcmpeq_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8) + +define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_pcmpeq_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8) + +define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_pcmpgt_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { +; CHECK-LABEL: test_mask_pcmpgt_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8) + +define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_pcmpgt_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { +; CHECK-LABEL: test_mask_pcmpgt_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8) diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index 4737c1f0a1d..de7f2888aba 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -1,100 +1,9 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s ; 256-bit -define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) { -; CHECK-LABEL: test_pcmpeq_d_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) - ret i8 %res -} - -define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { -; CHECK-LABEL: test_mask_pcmpeq_d_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8) - -define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) { -; CHECK-LABEL: test_pcmpeq_q_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) - ret i8 %res -} - -define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { -; CHECK-LABEL: test_mask_pcmpeq_q_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8) - -define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) { -; CHECK-LABEL: test_pcmpgt_d_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) - ret i8 %res -} - -define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { -; CHECK-LABEL: test_mask_pcmpgt_d_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8) - -define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) { -; CHECK-LABEL: test_pcmpgt_q_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) - ret i8 %res -} - -define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { -; CHECK-LABEL: test_mask_pcmpgt_q_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8) - define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: test_cmp_d_256: ; CHECK: ## BB#0: @@ -549,98 +458,6 @@ declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounw ; 128-bit -define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_pcmpeq_d_128: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) - ret i8 %res -} - -define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { -; CHECK-LABEL: test_mask_pcmpeq_d_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8) - -define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_pcmpeq_q_128: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) - ret i8 %res -} - -define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { -; CHECK-LABEL: test_mask_pcmpeq_q_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8) - -define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_pcmpgt_d_128: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) - ret i8 %res -} - -define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { -; CHECK-LABEL: test_mask_pcmpgt_d_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8) - -define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_pcmpgt_q_128: -; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) - ret i8 %res -} - -define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { -; CHECK-LABEL: test_mask_pcmpgt_q_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8) - define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_cmp_d_128: ; CHECK: ## BB#0: @@ -7955,9 +7772,9 @@ define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() { ; CHECK: ## BB#0: ; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A] -; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI510_0-4, kind: reloc_riprel_4byte +; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI494_0-4, kind: reloc_riprel_4byte ; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A] -; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI510_1-4, kind: reloc_riprel_4byte +; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI494_1-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> , <8 x i32> , <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -7988,9 +7805,9 @@ define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) { ; CHECK: ## BB#0: ; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607] ; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A] -; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI512_0-4, kind: reloc_riprel_4byte +; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI496_0-4, kind: reloc_riprel_4byte ; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A] -; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI512_1-4, kind: reloc_riprel_4byte +; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI496_1-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> , <2 x i64> , <2 x i64> zeroinitializer, i8 -1) ret <2 x i64> %res -- 2.50.1