From: Craig Topper Date: Wed, 6 Jul 2016 06:27:31 +0000 (+0000) Subject: [X86] Use native IR for immediate values 0-7 of packed fp cmp builtins. This makes... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=259aee973f1df2cba62cc3d32847dced739e9202;p=clang [X86] Use native IR for immediate values 0-7 of packed fp cmp builtins. This makes them the same as what is done when using the SSE builtins for these same encodings. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274608 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index dc0b164e6a..df5ca3a116 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -7289,6 +7289,51 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return getVectorFCmpIR(CmpInst::FCMP_UGT, V2F64); case X86::BI__builtin_ia32_cmpordpd: return getVectorFCmpIR(CmpInst::FCMP_ORD, V2F64); + case X86::BI__builtin_ia32_cmpps: + case X86::BI__builtin_ia32_cmpps256: + case X86::BI__builtin_ia32_cmppd: + case X86::BI__builtin_ia32_cmppd256: { + unsigned CC = cast(Ops[2])->getZExtValue(); + // If this one of the SSE immediates, we can use native IR. + if (CC < 8) { + FCmpInst::Predicate Pred; + switch (CC) { + case 0: Pred = FCmpInst::FCMP_OEQ; break; + case 1: Pred = FCmpInst::FCMP_OLT; break; + case 2: Pred = FCmpInst::FCMP_OLE; break; + case 3: Pred = FCmpInst::FCMP_UNO; break; + case 4: Pred = FCmpInst::FCMP_UNE; break; + case 5: Pred = FCmpInst::FCMP_UGE; break; + case 6: Pred = FCmpInst::FCMP_UGT; break; + case 7: Pred = FCmpInst::FCMP_ORD; break; + } + Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); + auto *FPVecTy = cast(Ops[0]->getType()); + auto *IntVecTy = llvm::VectorType::getInteger(FPVecTy); + Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); + return Builder.CreateBitCast(Sext, FPVecTy); + } + + // We can't handle 8-31 immediates with native IR, use the intrinsic. + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cmpps: + ID = Intrinsic::x86_sse_cmp_ps; + break; + case X86::BI__builtin_ia32_cmpps256: + ID = Intrinsic::x86_avx_cmp_ps_256; + break; + case X86::BI__builtin_ia32_cmppd: + ID = Intrinsic::x86_sse2_cmp_pd; + break; + case X86::BI__builtin_ia32_cmppd256: + ID = Intrinsic::x86_avx_cmp_pd_256; + break; + } + + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + } // SSE scalar comparison intrinsics case X86::BI__builtin_ia32_cmpeqss: diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c index b0deb47e6e..1e71245373 100644 --- a/test/CodeGen/avx2-builtins.c +++ b/test/CodeGen/avx2-builtins.c @@ -488,7 +488,9 @@ __m128d test_mm_mask_i32gather_pd(__m128d a, double const *b, __m128i c, __m128d __m256d test_mm256_i32gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm256_i32gather_pd - // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 0) + // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double> // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2) return _mm256_i32gather_pd(b, c, 2); } @@ -516,7 +518,9 @@ __m128 test_mm_mask_i32gather_ps(__m128 a, float const *b, __m128i c, __m128 d) __m256 test_mm256_i32gather_ps(float const *b, __m256i c) { // CHECK-LABEL: test_mm256_i32gather_ps - // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 0) + // CHECK: [[CMP:%.*]] = fcmp oeq <8 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float> // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2) return _mm256_i32gather_ps(b, c, 2); } @@ -592,7 +596,9 @@ __m128d test_mm_mask_i64gather_pd(__m128d a, double const *b, __m128i c, __m128d __m256d test_mm256_i64gather_pd(double const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_pd - // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 0) + // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double> // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2) return _mm256_i64gather_pd(b, c, 2); }