From: Simon Pilgrim Date: Sun, 20 Jan 2019 16:40:33 +0000 (+0000) Subject: [X86] Replace VPCOM/VPCOMU with generic integer comparisons (clang) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=70faccd01059ccabbad982410d9f8c455ff90bec;p=clang [X86] Replace VPCOM/VPCOMU with generic integer comparisons (clang) These intrinsics can always be replaced with generic integer comparisons without any regression in codegen, even for -O0/-fast-isel cases. Noticed while cleaning up vector integer comparison costs for PR40376. A future commit will remove/autoupgrade the existing VPCOM/VPCOMU llvm intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@351687 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 6732915413..390d28fd08 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -9198,6 +9198,46 @@ static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, return CGF.Builder.CreateCall(F, {Op0, Op1, Amt}); } +static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef Ops, + bool IsSigned) { + Value *Op0 = Ops[0]; + Value *Op1 = Ops[1]; + llvm::Type *Ty = Op0->getType(); + uint64_t Imm = cast(Ops[2])->getZExtValue() & 0x7; + + CmpInst::Predicate Pred; + switch (Imm) { + case 0x0: + Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + break; + case 0x1: + Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + break; + case 0x2: + Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + break; + case 0x3: + Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + break; + case 0x4: + Pred = ICmpInst::ICMP_EQ; + break; + case 0x5: + Pred = ICmpInst::ICMP_NE; + break; + case 0x6: + return llvm::Constant::getNullValue(Ty); // FALSE + case 0x7: + return llvm::Constant::getAllOnesValue(Ty); // TRUE + default: + llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); + } + + Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1); + Value *Res = CGF.Builder.CreateSExt(Cmp, Ty); + return Res; +} + static Value *EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1) { @@ -10928,6 +10968,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, unsigned CC = cast(Ops[2])->getZExtValue() & 0x7; return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_vpcomb: + case X86::BI__builtin_ia32_vpcomw: + case X86::BI__builtin_ia32_vpcomd: + case X86::BI__builtin_ia32_vpcomq: + return EmitX86vpcom(*this, Ops, true); + case X86::BI__builtin_ia32_vpcomub: + case X86::BI__builtin_ia32_vpcomuw: + case X86::BI__builtin_ia32_vpcomud: + case X86::BI__builtin_ia32_vpcomuq: + return EmitX86vpcom(*this, Ops, false); case X86::BI__builtin_ia32_kortestcqi: case X86::BI__builtin_ia32_kortestchi: diff --git a/test/CodeGen/xop-builtins-cmp.c b/test/CodeGen/xop-builtins-cmp.c index a805352ad3..4fbe6d0e70 100644 --- a/test/CodeGen/xop-builtins-cmp.c +++ b/test/CodeGen/xop-builtins-cmp.c @@ -8,49 +8,57 @@ __m128i test_mm_comlt_epu8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comlt_epu8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0) + // CHECK: icmp ult <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comlt_epu8(a, b); } __m128i test_mm_comlt_epu16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comlt_epu16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0) + // CHECK: icmp ult <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comlt_epu16(a, b); } __m128i test_mm_comlt_epu32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comlt_epu32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0) + // CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comlt_epu32(a, b); } __m128i test_mm_comlt_epu64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comlt_epu64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0) + // CHECK: icmp ult <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comlt_epu64(a, b); } __m128i test_mm_comlt_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comlt_epi8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0) + // CHECK: icmp slt <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comlt_epi8(a, b); } __m128i test_mm_comlt_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comlt_epi16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0) + // CHECK: icmp slt <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comlt_epi16(a, b); } __m128i test_mm_comlt_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comlt_epi32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0) + // CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comlt_epi32(a, b); } __m128i test_mm_comlt_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comlt_epi64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0) + // CHECK: icmp slt <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comlt_epi64(a, b); } @@ -58,49 +66,57 @@ __m128i test_mm_comlt_epi64(__m128i a, __m128i b) { __m128i test_mm_comle_epu8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comle_epu8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 1) + // CHECK: icmp ule <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comle_epu8(a, b); } __m128i test_mm_comle_epu16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comle_epu16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 1) + // CHECK: icmp ule <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comle_epu16(a, b); } __m128i test_mm_comle_epu32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comle_epu32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 1) + // CHECK: icmp ule <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comle_epu32(a, b); } __m128i test_mm_comle_epu64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comle_epu64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 1) + // CHECK: icmp ule <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comle_epu64(a, b); } __m128i test_mm_comle_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comle_epi8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 1) + // CHECK: icmp sle <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comle_epi8(a, b); } __m128i test_mm_comle_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comle_epi16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 1) + // CHECK: icmp sle <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comle_epi16(a, b); } __m128i test_mm_comle_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comle_epi32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 1) + // CHECK: icmp sle <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comle_epi32(a, b); } __m128i test_mm_comle_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comle_epi64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 1) + // CHECK: icmp sle <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comle_epi64(a, b); } @@ -108,49 +124,57 @@ __m128i test_mm_comle_epi64(__m128i a, __m128i b) { __m128i test_mm_comgt_epu8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comgt_epu8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 2) + // CHECK: icmp ugt <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comgt_epu8(a, b); } __m128i test_mm_comgt_epu16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comgt_epu16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 2) + // CHECK: icmp ugt <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comgt_epu16(a, b); } __m128i test_mm_comgt_epu32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comgt_epu32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 2) + // CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comgt_epu32(a, b); } __m128i test_mm_comgt_epu64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comgt_epu64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2) + // CHECK: icmp ugt <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comgt_epu64(a, b); } __m128i test_mm_comgt_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comgt_epi8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 2) + // CHECK: icmp sgt <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comgt_epi8(a, b); } __m128i test_mm_comgt_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comgt_epi16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 2) + // CHECK: icmp sgt <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comgt_epi16(a, b); } __m128i test_mm_comgt_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comgt_epi32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 2) + // CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comgt_epi32(a, b); } __m128i test_mm_comgt_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comgt_epi64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2) + // CHECK: icmp sgt <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comgt_epi64(a, b); } @@ -158,49 +182,57 @@ __m128i test_mm_comgt_epi64(__m128i a, __m128i b) { __m128i test_mm_comge_epu8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comge_epu8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 3) + // CHECK: icmp uge <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comge_epu8(a, b); } __m128i test_mm_comge_epu16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comge_epu16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 3) + // CHECK: icmp uge <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comge_epu16(a, b); } __m128i test_mm_comge_epu32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comge_epu32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 3) + // CHECK: icmp uge <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comge_epu32(a, b); } __m128i test_mm_comge_epu64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comge_epu64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 3) + // CHECK: icmp uge <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comge_epu64(a, b); } __m128i test_mm_comge_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comge_epi8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 3) + // CHECK: icmp sge <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comge_epi8(a, b); } __m128i test_mm_comge_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comge_epi16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 3) + // CHECK: icmp sge <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comge_epi16(a, b); } __m128i test_mm_comge_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comge_epi32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 3) + // CHECK: icmp sge <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comge_epi32(a, b); } __m128i test_mm_comge_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comge_epi64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 3) + // CHECK: icmp sge <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comge_epi64(a, b); } @@ -208,49 +240,57 @@ __m128i test_mm_comge_epi64(__m128i a, __m128i b) { __m128i test_mm_comeq_epu8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comeq_epu8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 4) + // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comeq_epu8(a, b); } __m128i test_mm_comeq_epu16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comeq_epu16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 4) + // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comeq_epu16(a, b); } __m128i test_mm_comeq_epu32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comeq_epu32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 4) + // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comeq_epu32(a, b); } __m128i test_mm_comeq_epu64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comeq_epu64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 4) + // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comeq_epu64(a, b); } __m128i test_mm_comeq_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comeq_epi8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 4) + // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comeq_epi8(a, b); } __m128i test_mm_comeq_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comeq_epi16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 4) + // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comeq_epi16(a, b); } __m128i test_mm_comeq_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comeq_epi32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 4) + // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comeq_epi32(a, b); } __m128i test_mm_comeq_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comeq_epi64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 4) + // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comeq_epi64(a, b); } @@ -258,49 +298,57 @@ __m128i test_mm_comeq_epi64(__m128i a, __m128i b) { __m128i test_mm_comneq_epu8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comneq_epu8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 5) + // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comneq_epu8(a, b); } __m128i test_mm_comneq_epu16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comneq_epu16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 5) + // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comneq_epu16(a, b); } __m128i test_mm_comneq_epu32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comneq_epu32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 5) + // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comneq_epu32(a, b); } __m128i test_mm_comneq_epu64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comneq_epu64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 5) + // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comneq_epu64(a, b); } __m128i test_mm_comneq_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comneq_epi8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 5) + // CHECK: icmp ne <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_comneq_epi8(a, b); } __m128i test_mm_comneq_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comneq_epi16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 5) + // CHECK: icmp ne <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_comneq_epi16(a, b); } __m128i test_mm_comneq_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comneq_epi32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 5) + // CHECK: icmp ne <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_comneq_epi32(a, b); } __m128i test_mm_comneq_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comneq_epi64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 5) + // CHECK: icmp ne <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_comneq_epi64(a, b); } @@ -308,49 +356,49 @@ __m128i test_mm_comneq_epi64(__m128i a, __m128i b) { __m128i test_mm_comfalse_epu8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comfalse_epu8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 6) + // CHECK: ret <2 x i64> zeroinitializer return _mm_comfalse_epu8(a, b); } __m128i test_mm_comfalse_epu16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comfalse_epu16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 6) + // CHECK: ret <2 x i64> zeroinitializer return _mm_comfalse_epu16(a, b); } __m128i test_mm_comfalse_epu32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comfalse_epu32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 6) + // CHECK: ret <2 x i64> zeroinitializer return _mm_comfalse_epu32(a, b); } __m128i test_mm_comfalse_epu64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comfalse_epu64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 6) + // CHECK: ret <2 x i64> zeroinitializer return _mm_comfalse_epu64(a, b); } __m128i test_mm_comfalse_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comfalse_epi8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 6) + // CHECK: ret <2 x i64> zeroinitializer return _mm_comfalse_epi8(a, b); } __m128i test_mm_comfalse_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comfalse_epi16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 6) + // CHECK: ret <2 x i64> zeroinitializer return _mm_comfalse_epi16(a, b); } __m128i test_mm_comfalse_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comfalse_epi32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 6) + // CHECK: ret <2 x i64> zeroinitializer return _mm_comfalse_epi32(a, b); } __m128i test_mm_comfalse_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comfalse_epi64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 6) + // CHECK: ret <2 x i64> zeroinitializer return _mm_comfalse_epi64(a, b); } @@ -358,48 +406,48 @@ __m128i test_mm_comfalse_epi64(__m128i a, __m128i b) { __m128i test_mm_comtrue_epu8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 7) + // CHECK: ret <2 x i64> return _mm_comtrue_epu8(a, b); } __m128i test_mm_comtrue_epu16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 7) + // CHECK: ret <2 x i64> return _mm_comtrue_epu16(a, b); } __m128i test_mm_comtrue_epu32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 7) + // CHECK: ret <2 x i64> return _mm_comtrue_epu32(a, b); } __m128i test_mm_comtrue_epu64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 7) + // CHECK: ret <2 x i64> return _mm_comtrue_epu64(a, b); } __m128i test_mm_comtrue_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 7) + // CHECK: ret <2 x i64> return _mm_comtrue_epi8(a, b); } __m128i test_mm_comtrue_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 7) + // CHECK: ret <2 x i64> return _mm_comtrue_epi16(a, b); } __m128i test_mm_comtrue_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 7) + // CHECK: ret <2 x i64> return _mm_comtrue_epi32(a, b); } __m128i test_mm_comtrue_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 7) + // CHECK: ret <2 x i64> return _mm_comtrue_epi64(a, b); } diff --git a/test/CodeGen/xop-builtins.c b/test/CodeGen/xop-builtins.c index e6a09007f7..01d77ce056 100644 --- a/test/CodeGen/xop-builtins.c +++ b/test/CodeGen/xop-builtins.c @@ -290,49 +290,57 @@ __m128i test_mm_sha_epi64(__m128i a, __m128i b) { __m128i test_mm_com_epu8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_com_epu8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0) + // CHECK: icmp ult <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_com_epu8(a, b, 0); } __m128i test_mm_com_epu16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_com_epu16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0) + // CHECK: icmp ult <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_com_epu16(a, b, 0); } __m128i test_mm_com_epu32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_com_epu32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0) + // CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_com_epu32(a, b, 0); } __m128i test_mm_com_epu64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_com_epu64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0) + // CHECK: icmp ult <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_com_epu64(a, b, 0); } __m128i test_mm_com_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_com_epi8 - // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0) + // CHECK: icmp slt <16 x i8> %{{.*}}, %{{.*}} + // CHECK: sext <16 x i1> %{{.*}} to <16 x i8> return _mm_com_epi8(a, b, 0); } __m128i test_mm_com_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_com_epi16 - // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0) + // CHECK: icmp slt <8 x i16> %{{.*}}, %{{.*}} + // CHECK: sext <8 x i1> %{{.*}} to <8 x i16> return _mm_com_epi16(a, b, 0); } __m128i test_mm_com_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_com_epi32 - // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0) + // CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}} + // CHECK: sext <4 x i1> %{{.*}} to <4 x i32> return _mm_com_epi32(a, b, 0); } __m128i test_mm_com_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_com_epi64 - // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0) + // CHECK: icmp slt <2 x i64> %{{.*}}, %{{.*}} + // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_com_epi64(a, b, 0); }