From: Craig Topper Date: Sat, 26 Jan 2019 06:27:01 +0000 (+0000) Subject: [X86] Remove and autoupgrade vpconflict intrinsics that take a mask and passthru... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a0486c613397bdd4ae00feab5ecc549db0987c94;p=llvm [X86] Remove and autoupgrade vpconflict intrinsics that take a mask and passthru argument. We have unmasked versions as of r352172 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352270 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 642a928db56..5f40a861ea9 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -4014,32 +4014,6 @@ let TargetPrefix = "x86" in { def int_x86_avx512_conflict_q_512 : GCCBuiltin<"__builtin_ia32_vpconflictdi_512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_conflict_d_128 : // FIXME: remove - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_conflict_d_256 : // FIXME: remove - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_conflict_d_512 : // FIXME: remove - Intrinsic<[llvm_v16i32_ty], - [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_conflict_q_128 : // FIXME: remove - Intrinsic<[llvm_v2i64_ty], - [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_conflict_q_256 : // FIXME: remove - Intrinsic<[llvm_v4i64_ty], - [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_conflict_q_512 : // FIXME: remove - Intrinsic<[llvm_v8i64_ty], - [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem]>; } // Compares diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index d8949661e96..fe1a4ffd0fa 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -299,6 +299,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0 Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0 Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0 + Name.startswith("avx512.mask.conflict.") || // Added in 9.0 Name == "avx512.mask.pmov.qd.256" || // Added in 9.0 Name == "avx512.mask.pmov.qd.512" || // Added in 9.0 Name == "avx512.mask.pmov.wb.256" || // Added in 9.0 @@ -1503,6 +1504,21 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_pmultishift_qb_512; else llvm_unreachable("Unexpected intrinsic"); + } else if (Name.startswith("conflict.")) { + if (Name[9] == 'd' && VecWidth == 128) + IID = Intrinsic::x86_avx512_conflict_d_128; + else if (Name[9] == 'd' && VecWidth == 256) + IID = Intrinsic::x86_avx512_conflict_d_256; + else if (Name[9] == 'd' && VecWidth == 512) + IID = Intrinsic::x86_avx512_conflict_d_512; + else if (Name[9] == 'q' && VecWidth == 128) + IID = Intrinsic::x86_avx512_conflict_q_128; + else if (Name[9] == 'q' && VecWidth == 256) + IID = Intrinsic::x86_avx512_conflict_q_256; + else if (Name[9] == 'q' && VecWidth == 512) + IID = Intrinsic::x86_avx512_conflict_q_512; + else + llvm_unreachable("Unexpected intrinsic"); } else return false; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 65e73e8497e..5e83090702c 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -501,18 +501,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::COMPRESS, 0), X86_INTRINSIC_DATA(avx512_mask_compress_w_512, COMPRESS_EXPAND_IN_REG, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK, - X86ISD::CONFLICT, 0), - X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK, - X86ISD::CONFLICT, 0), - X86_INTRINSIC_DATA(avx512_mask_conflict_d_512, INTR_TYPE_1OP_MASK, - X86ISD::CONFLICT, 0), - X86_INTRINSIC_DATA(avx512_mask_conflict_q_128, INTR_TYPE_1OP_MASK, - X86ISD::CONFLICT, 0), - X86_INTRINSIC_DATA(avx512_mask_conflict_q_256, INTR_TYPE_1OP_MASK, - X86ISD::CONFLICT, 0), - X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK, - X86ISD::CONFLICT, 0), X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2DQ_MASK, X86ISD::CVTP2SI, X86ISD::MCVTP2SI), X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK, diff --git a/test/CodeGen/X86/avx512cd-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512cd-intrinsics-upgrade.ll index e5164eea3c8..5e076c1f38c 100644 --- a/test/CodeGen/X86/avx512cd-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512cd-intrinsics-upgrade.ll @@ -2,6 +2,98 @@ ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd | FileCheck %s --check-prefixes=CHECK,X64 +declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly + +define <16 x i32> @test_conflict_d(<16 x i32> %a) { +; CHECK-LABEL: test_conflict_d: +; CHECK: # %bb.0: +; CHECK-NEXT: vpconflictd %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> undef, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_mask_conflict_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { +; X86-LABEL: test_mask_conflict_d: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; X86-NEXT: vpconflictd %zmm0, %zmm1 {%k1} +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mask_conflict_d: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpconflictd %zmm0, %zmm1 {%k1} +; X64-NEXT: vmovdqa64 %zmm1, %zmm0 +; X64-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { +; X86-LABEL: test_maskz_conflict_d: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; X86-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z} +; X86-NEXT: retl +; +; X64-LABEL: test_maskz_conflict_d: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z} +; X64-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly + +define <8 x i64> @test_conflict_q(<8 x i64> %a) { +; CHECK-LABEL: test_conflict_q: +; CHECK: # %bb.0: +; CHECK-NEXT: vpconflictq %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> undef, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { +; X86-LABEL: test_mask_conflict_q: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vpconflictq %zmm0, %zmm1 {%k1} +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mask_conflict_q: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpconflictq %zmm0, %zmm1 {%k1} +; X64-NEXT: vmovdqa64 %zmm1, %zmm0 +; X64-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_maskz_conflict_q(<8 x i64> %a, i8 %mask) { +; X86-LABEL: test_maskz_conflict_q: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vpconflictq %zmm0, %zmm0 {%k1} {z} +; X86-NEXT: retl +; +; X64-LABEL: test_maskz_conflict_q: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpconflictq %zmm0, %zmm0 {%k1} {z} +; X64-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + define <16 x i32> @test_lzcnt_d(<16 x i32> %a) { ; CHECK-LABEL: test_lzcnt_d: ; CHECK: # %bb.0: diff --git a/test/CodeGen/X86/avx512cd-intrinsics.ll b/test/CodeGen/X86/avx512cd-intrinsics.ll index fd18951747a..ea248f47f8d 100644 --- a/test/CodeGen/X86/avx512cd-intrinsics.ll +++ b/test/CodeGen/X86/avx512cd-intrinsics.ll @@ -2,18 +2,34 @@ ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd | FileCheck %s --check-prefixes=CHECK,X64 -declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly - -define <8 x i64> @test_conflict_q(<8 x i64> %a) { -; CHECK-LABEL: test_conflict_q: +define <16 x i32> @test_conflict_d(<16 x i32> %a) { +; CHECK-LABEL: test_conflict_d: ; CHECK: # %bb.0: -; CHECK-NEXT: vpconflictq %zmm0, %zmm0 +; CHECK-NEXT: vpconflictd %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} - %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) - ret <8 x i64> %res + %1 = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a) + ret <16 x i32> %1 } -declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly +define <16 x i32> @test_mask_conflict_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { +; X86-LABEL: test_mask_conflict_d: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; X86-NEXT: vpconflictd %zmm0, %zmm1 {%k1} +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mask_conflict_d: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpconflictd %zmm0, %zmm1 {%k1} +; X64-NEXT: vmovdqa64 %zmm1, %zmm0 +; X64-NEXT: retq + %1 = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %b + ret <16 x i32> %3 +} define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { ; X86-LABEL: test_maskz_conflict_d: @@ -27,8 +43,19 @@ define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { ; X64-NEXT: kmovw %edi, %k1 ; X64-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z} ; X64-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask) - ret <16 x i32> %res + %1 = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + ret <16 x i32> %3 +} + +define <8 x i64> @test_conflict_q(<8 x i64> %a) { +; CHECK-LABEL: test_conflict_q: +; CHECK: # %bb.0: +; CHECK-NEXT: vpconflictq %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %1 = call <8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %a) + ret <8 x i64> %1 } define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { @@ -46,8 +73,29 @@ define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { ; X64-NEXT: vpconflictq %zmm0, %zmm1 {%k1} ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ; X64-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) - ret <8 x i64> %res + %1 = call <8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %a) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %b + ret <8 x i64> %3 +} + +define <8 x i64> @test_maskz_conflict_q(<8 x i64> %a, i8 %mask) { +; X86-LABEL: test_maskz_conflict_q: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vpconflictq %zmm0, %zmm0 {%k1} {z} +; X86-NEXT: retl +; +; X64-LABEL: test_maskz_conflict_q: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpconflictq %zmm0, %zmm0 {%k1} {z} +; X64-NEXT: retq + %1 = call <8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %a) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + ret <8 x i64> %3 } define <16 x i32> @test_lzcnt_d(<16 x i32> %a) { @@ -110,3 +158,6 @@ define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %b ret <8 x i64> %3 } + +declare <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32>) +declare <8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64>) diff --git a/test/CodeGen/X86/avx512cdvl-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512cdvl-intrinsics-upgrade.ll index e8c8fc700a9..6f4fc11495f 100644 --- a/test/CodeGen/X86/avx512cdvl-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512cdvl-intrinsics-upgrade.ll @@ -180,3 +180,127 @@ define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) { } declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8) +declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8) + +define <4 x i32> @test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: +; X86: # %bb.0: +; X86-NEXT: vpconflictd %xmm0, %xmm2 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vpconflictd %xmm0, %xmm1 {%k1} +; X86-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 +; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: +; X64: # %bb.0: +; X64-NEXT: vpconflictd %xmm0, %xmm2 +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpconflictd %xmm0, %xmm1 {%k1} +; X64-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 +; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; X64-NEXT: retq + %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) + %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) + %res2 = add <4 x i32> %res, %res1 + %res4 = add <4 x i32> %res2, %res3 + ret <4 x i32> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8) + +define <8 x i32> @test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: +; X86: # %bb.0: +; X86-NEXT: vpconflictd %ymm0, %ymm2 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vpconflictd %ymm0, %ymm1 {%k1} +; X86-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z} +; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 +; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl +; +; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: +; X64: # %bb.0: +; X64-NEXT: vpconflictd %ymm0, %ymm2 +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpconflictd %ymm0, %ymm1 {%k1} +; X64-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 +; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; X64-NEXT: retq + %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) + %res2 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> zeroinitializer, i8 %x2) + %res3 = add <8 x i32> %res, %res1 + %res4 = add <8 x i32> %res2, %res3 + ret <8 x i32> %res4 +} + +declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8) + +define <2 x i64> @test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: +; X86: # %bb.0: +; X86-NEXT: vpconflictq %xmm0, %xmm2 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vpconflictq %xmm0, %xmm1 {%k1} +; X86-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: +; X64: # %bb.0: +; X64-NEXT: vpconflictq %xmm0, %xmm2 +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpconflictq %xmm0, %xmm1 {%k1} +; X64-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; X64-NEXT: retq + %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) + %res2 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> zeroinitializer, i8 %x2) + %res3 = add <2 x i64> %res, %res1 + %res4 = add <2 x i64> %res2, %res3 + ret <2 x i64> %res4 +} + +declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8) + +define <4 x i64> @test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { +; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: +; X86: # %bb.0: +; X86-NEXT: vpconflictq %ymm0, %ymm2 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vpconflictq %ymm0, %ymm1 {%k1} +; X86-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z} +; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 +; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl +; +; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: +; X64: # %bb.0: +; X64-NEXT: vpconflictq %ymm0, %ymm2 +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpconflictq %ymm0, %ymm1 {%k1} +; X64-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 +; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; X64-NEXT: retq + %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) + %res2 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> zeroinitializer, i8 %x2) + %res3 = add <4 x i64> %res, %res1 + %res4 = add <4 x i64> %res2, %res3 + ret <4 x i64> %res4 +} + diff --git a/test/CodeGen/X86/avx512cdvl-intrinsics.ll b/test/CodeGen/X86/avx512cdvl-intrinsics.ll index cd811e584ee..4d7e19c38a2 100644 --- a/test/CodeGen/X86/avx512cdvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512cdvl-intrinsics.ll @@ -118,109 +118,145 @@ define <4 x i64> @test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64 } declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) #0 -declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8) - -define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { +define <4 x i32> @test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: ; X86: # %bb.0: +; X86-NEXT: vpconflictd %xmm0, %xmm2 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpconflictd %xmm0, %xmm1 {%k1} -; X86-NEXT: vpconflictd %xmm0, %xmm2 {%k1} {z} -; X86-NEXT: vpconflictd %xmm0, %xmm0 -; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; X86-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: ; X64: # %bb.0: +; X64-NEXT: vpconflictd %xmm0, %xmm2 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vpconflictd %xmm0, %xmm2 {%k1} {z} ; X64-NEXT: vpconflictd %xmm0, %xmm1 {%k1} -; X64-NEXT: vpconflictd %xmm0, %xmm0 -; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; X64-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ; X64-NEXT: retq - %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) - %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) - %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) - %res2 = add <4 x i32> %res, %res1 - %res4 = add <4 x i32> %res2, %res3 + %1 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0) + %2 = bitcast i8 %x2 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1 + %4 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0) + %5 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0) + %6 = bitcast i8 %x2 to <8 x i1> + %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> + %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer + %res2 = add <4 x i32> %3, %4 + %res4 = add <4 x i32> %res2, %7 ret <4 x i32> %res4 } -declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8) - -define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { +define <8 x i32> @test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: ; X86: # %bb.0: +; X86-NEXT: vpconflictd %ymm0, %ymm2 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpconflictd %ymm0, %ymm1 {%k1} -; X86-NEXT: vpconflictd %ymm0, %ymm0 +; X86-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z} +; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: ; X64: # %bb.0: +; X64-NEXT: vpconflictd %ymm0, %ymm2 ; X64-NEXT: kmovw %edi, %k1 ; X64-NEXT: vpconflictd %ymm0, %ymm1 {%k1} -; X64-NEXT: vpconflictd %ymm0, %ymm0 +; X64-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq - %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) - %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) - %res2 = add <8 x i32> %res, %res1 - ret <8 x i32> %res2 + %1 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0) + %2 = bitcast i8 %x2 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1 + %4 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0) + %5 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0) + %6 = bitcast i8 %x2 to <8 x i1> + %7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> zeroinitializer + %res3 = add <8 x i32> %3, %4 + %res4 = add <8 x i32> %7, %res3 + ret <8 x i32> %res4 } -declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8) - -define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { +define <2 x i64> @test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: ; X86: # %bb.0: +; X86-NEXT: vpconflictq %xmm0, %xmm2 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpconflictq %xmm0, %xmm1 {%k1} -; X86-NEXT: vpconflictq %xmm0, %xmm0 +; X86-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: ; X64: # %bb.0: +; X64-NEXT: vpconflictq %xmm0, %xmm2 ; X64-NEXT: kmovw %edi, %k1 ; X64-NEXT: vpconflictq %xmm0, %xmm1 {%k1} -; X64-NEXT: vpconflictq %xmm0, %xmm0 +; X64-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; X64-NEXT: retq - %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) - %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) - %res2 = add <2 x i64> %res, %res1 - ret <2 x i64> %res2 + %1 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0) + %2 = bitcast i8 %x2 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> + %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x1 + %4 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0) + %5 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0) + %6 = bitcast i8 %x2 to <8 x i1> + %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <2 x i32> + %7 = select <2 x i1> %extract, <2 x i64> %5, <2 x i64> zeroinitializer + %res3 = add <2 x i64> %3, %4 + %res4 = add <2 x i64> %7, %res3 + ret <2 x i64> %res4 } -declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8) - -define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { +define <4 x i64> @test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: ; X86: # %bb.0: +; X86-NEXT: vpconflictq %ymm0, %ymm2 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpconflictq %ymm0, %ymm1 {%k1} -; X86-NEXT: vpconflictq %ymm0, %ymm0 +; X86-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z} +; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: ; X64: # %bb.0: +; X64-NEXT: vpconflictq %ymm0, %ymm2 ; X64-NEXT: kmovw %edi, %k1 ; X64-NEXT: vpconflictq %ymm0, %ymm1 {%k1} -; X64-NEXT: vpconflictq %ymm0, %ymm0 +; X64-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq - %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) - %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) - %res2 = add <4 x i64> %res, %res1 - ret <4 x i64> %res2 + %1 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0) + %2 = bitcast i8 %x2 to <8 x i1> + %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x1 + %4 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0) + %5 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0) + %6 = bitcast i8 %x2 to <8 x i1> + %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> + %7 = select <4 x i1> %extract, <4 x i64> %5, <4 x i64> zeroinitializer + %res3 = add <4 x i64> %3, %4 + %res4 = add <4 x i64> %7, %res3 + ret <4 x i64> %res4 } +declare <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32>) +declare <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32>) +declare <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64>) +declare <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64>)