From e5e77e4a92c4a808c763f9de7149738dd214add4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Nov 2016 05:04:44 +0000 Subject: [PATCH] [AVX-512] Replace masked 16-bit element variable shift intrinsics with new unmasked versions and selects. The same thing was done to 32-bit and 64-bit element sizes previously. This will allow us to support these shuffls in InstCombineCalls along with the other variable shift intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287312 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 56 +++++++++++------------ lib/IR/AutoUpgrade.cpp | 43 ++++++++++------- lib/Target/X86/X86IntrinsicsInfo.h | 18 ++++---- test/CodeGen/X86/avx512bw-intrinsics.ll | 48 +++++++++---------- test/CodeGen/X86/avx512bwvl-intrinsics.ll | 48 +++++++++---------- 5 files changed, 112 insertions(+), 101 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index fa995bc5a12..dacb60c94fd 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -1844,16 +1844,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psllv16_hi : GCCBuiltin<"__builtin_ia32_psllv16hi_mask">, - Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, - llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psllv32hi : GCCBuiltin<"__builtin_ia32_psllv32hi_mask">, - Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, - llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psllv8_hi : GCCBuiltin<"__builtin_ia32_psllv8hi_mask">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pmultishift_qb_128: GCCBuiltin<"__builtin_ia32_vpmultishiftqb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, @@ -2320,25 +2310,35 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psrav16_hi : GCCBuiltin<"__builtin_ia32_psrav16hi_mask">, - Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, - llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psrav32_hi : GCCBuiltin<"__builtin_ia32_psrav32hi_mask">, - Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, - llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psrav8_hi : GCCBuiltin<"__builtin_ia32_psrav8hi_mask">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_psllv_w_128 : GCCBuiltin<"__builtin_ia32_psllv8hi">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_avx512_psllv_w_256 : GCCBuiltin<"__builtin_ia32_psllv16hi">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + def int_x86_avx512_psllv_w_512 : GCCBuiltin<"__builtin_ia32_psllv32hi">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], + [IntrNoMem]>; - def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">, - Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, - llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psrlv32hi : GCCBuiltin<"__builtin_ia32_psrlv32hi_mask">, - Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, - llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psrlv8_hi : GCCBuiltin<"__builtin_ia32_psrlv8hi_mask">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_psrlv_w_128 : GCCBuiltin<"__builtin_ia32_psrlv8hi">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_avx512_psrlv_w_256 : GCCBuiltin<"__builtin_ia32_psrlv16hi">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + def int_x86_avx512_psrlv_w_512 : GCCBuiltin<"__builtin_ia32_psrlv32hi">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_psrav_w_128 : GCCBuiltin<"__builtin_ia32_psrav8hi">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_avx512_psrav_w_256 : GCCBuiltin<"__builtin_ia32_psrav16hi">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + def int_x86_avx512_psrav_w_512 : GCCBuiltin<"__builtin_ia32_psrav32hi">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 18f2c17c7c9..93d5d921801 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -325,19 +325,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.startswith("avx512.mask.pslli") || // Added in 4.0 Name.startswith("avx512.mask.psrai") || // Added in 4.0 Name.startswith("avx512.mask.psrli") || // Added in 4.0 - Name == "avx512.mask.psllv2.di" || // Added in 4.0 - Name == "avx512.mask.psllv4.di" || // Added in 4.0 - Name == "avx512.mask.psllv4.si" || // Added in 4.0 - Name == "avx512.mask.psllv8.si" || // Added in 4.0 - Name == "avx512.mask.psrav4.si" || // Added in 4.0 - Name == "avx512.mask.psrav8.si" || // Added in 4.0 - Name == "avx512.mask.psrlv2.di" || // Added in 4.0 - Name == "avx512.mask.psrlv4.di" || // Added in 4.0 - Name == "avx512.mask.psrlv4.si" || // Added in 4.0 - Name == "avx512.mask.psrlv8.si" || // Added in 4.0 - Name.startswith("avx512.mask.psllv.") || // Added in 4.0 - Name.startswith("avx512.mask.psrav.") || // Added in 4.0 - Name.startswith("avx512.mask.psrlv.") || // Added in 4.0 + Name.startswith("avx512.mask.psllv") || // Added in 4.0 + Name.startswith("avx512.mask.psrav") || // Added in 4.0 + Name.startswith("avx512.mask.psrlv") || // Added in 4.0 Name.startswith("sse41.pmovsx") || // Added in 3.8 Name.startswith("sse41.pmovzx") || // Added in 3.9 Name.startswith("avx2.pmovsx") || // Added in 3.9 @@ -1439,7 +1429,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { bool IsVariable = Name[16] == 'v'; char Size = Name[16] == '.' ? Name[17] : Name[17] == '.' ? Name[18] : - Name[19]; + Name[18] == '.' ? Name[19] : + Name[20]; Intrinsic::ID IID; if (IsVariable && Name[17] != '.') { @@ -1451,6 +1442,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { IID = Intrinsic::x86_avx2_psllv_d; else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si IID = Intrinsic::x86_avx2_psllv_d_256; + else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi + IID = Intrinsic::x86_avx512_psllv_w_128; + else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi + IID = Intrinsic::x86_avx512_psllv_w_256; + else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi + IID = Intrinsic::x86_avx512_psllv_w_512; else llvm_unreachable("Unexpected size"); } else if (Name.endswith(".128")) { @@ -1500,7 +1497,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { bool IsVariable = Name[16] == 'v'; char Size = Name[16] == '.' ? Name[17] : Name[17] == '.' ? Name[18] : - Name[19]; + Name[18] == '.' ? Name[19] : + Name[20]; Intrinsic::ID IID; if (IsVariable && Name[17] != '.') { @@ -1512,6 +1510,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { IID = Intrinsic::x86_avx2_psrlv_d; else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si IID = Intrinsic::x86_avx2_psrlv_d_256; + else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi + IID = Intrinsic::x86_avx512_psrlv_w_128; + else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi + IID = Intrinsic::x86_avx512_psrlv_w_256; + else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi + IID = Intrinsic::x86_avx512_psrlv_w_512; else llvm_unreachable("Unexpected size"); } else if (Name.endswith(".128")) { @@ -1561,7 +1565,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { bool IsVariable = Name[16] == 'v'; char Size = Name[16] == '.' ? Name[17] : Name[17] == '.' ? Name[18] : - Name[19]; + Name[18] == '.' ? Name[19] : + Name[20]; Intrinsic::ID IID; if (IsVariable && Name[17] != '.') { @@ -1569,6 +1574,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { IID = Intrinsic::x86_avx2_psrav_d; else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si IID = Intrinsic::x86_avx2_psrav_d_256; + else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi + IID = Intrinsic::x86_avx512_psrav_w_128; + else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi + IID = Intrinsic::x86_avx512_psrav_w_256; + else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi + IID = Intrinsic::x86_avx512_psrav_w_512; else llvm_unreachable("Unexpected size"); } else if (Name.endswith(".128")) { diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index a6b1d29e8f9..3a0e7f101e4 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -1057,15 +1057,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK, X86ISD::PSHUFB, 0), - X86_INTRINSIC_DATA(avx512_mask_psllv16_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_mask_psllv32hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_mask_psllv8_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_mask_psrav16_hi, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0), - X86_INTRINSIC_DATA(avx512_mask_psrav32_hi, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0), - X86_INTRINSIC_DATA(avx512_mask_psrav8_hi, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0), - X86_INTRINSIC_DATA(avx512_mask_psrlv16_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_mask_psrlv32hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_mask_psrlv8_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), @@ -1493,6 +1484,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0), X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0), X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0), @@ -1507,6 +1501,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_psrav_q_128, INTR_TYPE_2OP, X86ISD::VSRAV, 0), X86_INTRINSIC_DATA(avx512_psrav_q_256, INTR_TYPE_2OP, X86ISD::VSRAV, 0), X86_INTRINSIC_DATA(avx512_psrav_q_512, INTR_TYPE_2OP, X86ISD::VSRAV, 0), + X86_INTRINSIC_DATA(avx512_psrav_w_128, INTR_TYPE_2OP, X86ISD::VSRAV, 0), + X86_INTRINSIC_DATA(avx512_psrav_w_256, INTR_TYPE_2OP, X86ISD::VSRAV, 0), + X86_INTRINSIC_DATA(avx512_psrav_w_512, INTR_TYPE_2OP, X86ISD::VSRAV, 0), X86_INTRINSIC_DATA(avx512_psrl_d_512, INTR_TYPE_2OP, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_psrl_q_512, INTR_TYPE_2OP, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_psrl_w_512, INTR_TYPE_2OP, X86ISD::VSRL, 0), @@ -1515,6 +1512,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_ptestm_b_128, CMP_MASK, X86ISD::TESTM, 0), X86_INTRINSIC_DATA(avx512_ptestm_b_256, CMP_MASK, X86ISD::TESTM, 0), X86_INTRINSIC_DATA(avx512_ptestm_b_512, CMP_MASK, X86ISD::TESTM, 0), diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index acd0fb07e9d..a7658e2c109 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -2288,22 +2288,22 @@ declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_psrlv32hi: ; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z} -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 -; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi: ; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z} -; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 -; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) @@ -2318,22 +2318,22 @@ declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi: ; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm3 ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z} -; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 -; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi: ; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z} -; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 -; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) @@ -2366,22 +2366,22 @@ declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_psllv32hi: ; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z} -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 -; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi: ; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z} -; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 -; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 514d3157b7d..5eacf2279f2 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -4440,12 +4440,12 @@ declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv16_hi: ; CHECK: ## BB#0: +; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x10,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] -; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xd9] -; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1] -; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] +; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -4460,12 +4460,12 @@ declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16 define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_hi: ; CHECK: ## BB#0: +; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x10,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] -; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x10,0xd9] -; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] +; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) @@ -4480,12 +4480,12 @@ declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrav16_hi: ; CHECK: ## BB#0: +; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x11,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] -; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xd9] -; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1] -; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] +; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -4500,12 +4500,12 @@ declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16 define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_hi: ; CHECK: ## BB#0: +; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x11,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] -; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x11,0xd9] -; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] +; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) @@ -4520,12 +4520,12 @@ declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16 define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psllv16_hi: ; CHECK: ## BB#0: +; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x12,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] -; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xd9] -; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1] -; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] +; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -4540,12 +4540,12 @@ declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16 define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_hi: ; CHECK: ## BB#0: +; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x12,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] -; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x12,0xd9] -; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] +; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) -- 2.40.0