From e051fbaeb306e17b6e395aa29988479c44efa072 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 20 Jan 2019 17:36:22 +0000 Subject: [PATCH] [X86] Auto upgrade old style VPCOM/VPCOMU intrinsics to generic integer comparisons We were upgrading these to the new style VPCOM/VPCOMU intrinsics (which includes the condition code immediate), but we'll be getting rid of those shortly, so convert these to generics first. This causes a couple of changes in the upgrade tests as signed/unsigned eq/ne are equivalent and we constant fold true/false codes, these changes are the same as what we already do for avx512 cmp/ucmp. Noticed while cleaning up vector integer comparison costs for PR40376. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351690 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AutoUpgrade.cpp | 69 +++++++++++++------ .../X86/xop-intrinsics-x86_64-upgrade.ll | 48 ++++++------- 2 files changed, 71 insertions(+), 46 deletions(-) diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 321c8c3992f..b28e9f51507 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -1052,6 +1052,45 @@ static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI, return Res; } +static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm, + bool IsSigned) { + Type *Ty = CI.getType(); + Value *LHS = CI.getArgOperand(0); + Value *RHS = CI.getArgOperand(1); + + CmpInst::Predicate Pred; + switch (Imm) { + case 0x0: + Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + break; + case 0x1: + Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + break; + case 0x2: + Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + break; + case 0x3: + Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + break; + case 0x4: + Pred = ICmpInst::ICMP_EQ; + break; + case 0x5: + Pred = ICmpInst::ICMP_NE; + break; + case 0x6: + return Constant::getNullValue(Ty); // FALSE + case 0x7: + return Constant::getAllOnesValue(Ty); // TRUE + default: + llvm_unreachable("Unknown XOP vpcom/vpcomu predicate"); + } + + Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS); + Value *Ext = Builder.CreateSExt(Cmp, Ty); + return Ext; +} + static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI, bool IsShiftRight, bool ZeroMask) { Type *Ty = CI.getType(); @@ -1989,23 +2028,13 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { ResultTy); Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec }); } else if (IsX86 && Name.startswith("xop.vpcom")) { - Intrinsic::ID intID; - if (Name.endswith("ub")) - intID = Intrinsic::x86_xop_vpcomub; - else if (Name.endswith("uw")) - intID = Intrinsic::x86_xop_vpcomuw; - else if (Name.endswith("ud")) - intID = Intrinsic::x86_xop_vpcomud; - else if (Name.endswith("uq")) - intID = Intrinsic::x86_xop_vpcomuq; - else if (Name.endswith("b")) - intID = Intrinsic::x86_xop_vpcomb; - else if (Name.endswith("w")) - intID = Intrinsic::x86_xop_vpcomw; - else if (Name.endswith("d")) - intID = Intrinsic::x86_xop_vpcomd; - else if (Name.endswith("q")) - intID = Intrinsic::x86_xop_vpcomq; + bool IsSigned; + if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") || + Name.endswith("uq")) + IsSigned = false; + else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") || + Name.endswith("q")) + IsSigned = true; else llvm_unreachable("Unknown suffix"); @@ -2029,11 +2058,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Imm = 7; else llvm_unreachable("Unknown condition"); - - Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); - Rep = - Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), - Builder.getInt8(Imm)}); + Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned); } else if (IsX86 && Name.startswith("xop.vpcmov")) { Value *Sel = CI->getArgOperand(2); Value *NotSel = Builder.CreateNot(Sel); diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll b/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll index 0f348369b3a..a9f237ab728 100644 --- a/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll +++ b/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll @@ -129,7 +129,7 @@ declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomequb: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomequb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcomeqb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ; ret <16 x i8> %res @@ -139,7 +139,7 @@ declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnon define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomequd: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomequd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcomeqd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ; ret <4 x i32> %res @@ -149,7 +149,7 @@ declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnon define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomequq: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomequq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcomeqq %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ; ret <2 x i64> %res @@ -159,7 +159,7 @@ declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnon define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomequw: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomequw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcomeqw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ; ret <8 x i16> %res @@ -169,7 +169,7 @@ declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnon define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomfalseb: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomfalseb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ; ret <16 x i8> %res @@ -179,7 +179,7 @@ declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readn define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomfalsed: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomfalsed %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ; ret <4 x i32> %res @@ -189,7 +189,7 @@ declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readn define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomfalseq: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomfalseq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ; ret <2 x i64> %res @@ -199,7 +199,7 @@ declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readn define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomfalseub: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomfalseub %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ; ret <16 x i8> %res @@ -209,7 +209,7 @@ declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind read define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomfalseud: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomfalseud %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ; ret <4 x i32> %res @@ -219,7 +219,7 @@ declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind read define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomfalseuq: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomfalseuq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ; ret <2 x i64> %res @@ -229,7 +229,7 @@ declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind read define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomfalseuw: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomfalseuw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ; ret <8 x i16> %res @@ -239,7 +239,7 @@ declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind read define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomfalsew: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomfalsew %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ; ret <8 x i16> %res @@ -599,7 +599,7 @@ declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomneub: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomnequb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcomneqb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ; ret <16 x i8> %res @@ -609,7 +609,7 @@ declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnon define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomneud: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomnequd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcomneqd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ; ret <4 x i32> %res @@ -619,7 +619,7 @@ declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnon define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomneuq: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomnequq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcomneqq %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ; ret <2 x i64> %res @@ -629,7 +629,7 @@ declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnon define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomneuw: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomnequw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcomneqw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ; ret <8 x i16> %res @@ -649,7 +649,7 @@ declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomtrueb: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomtrueb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ; ret <16 x i8> %res @@ -659,7 +659,7 @@ declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readno define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomtrued: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomtrued %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ; ret <4 x i32> %res @@ -669,7 +669,7 @@ declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readno define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomtrueq: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomtrueq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ; ret <2 x i64> %res @@ -679,7 +679,7 @@ declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readno define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomtrueub: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomtrueub %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ; ret <16 x i8> %res @@ -689,7 +689,7 @@ declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readn define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomtrueud: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomtrueud %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ; ret <4 x i32> %res @@ -699,7 +699,7 @@ declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readn define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomtrueuq: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomtrueuq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ; ret <2 x i64> %res @@ -709,7 +709,7 @@ declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readn define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomtrueuw: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomtrueuw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ; ret <8 x i16> %res @@ -719,7 +719,7 @@ declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readn define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: test_int_x86_xop_vpcomtruew: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcomtruew %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ; ret <8 x i16> %res -- 2.50.1