From 5cb252b75b931724bb687e1419e4e57dbff11a12 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 15 Jun 2016 17:18:50 +0000 Subject: [PATCH] [x86] generate IR for SSE integer min/max builtins Sibling patch to r272806: http://reviews.llvm.org/rL272806 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@272807 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGBuiltin.cpp | 27 +++++++++++++++++++++++++++ test/CodeGen/sse2-builtins.c | 12 ++++++++---- test/CodeGen/sse41-builtins.c | 24 ++++++++++++++++-------- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 0ec2b140ff..c504e2e252 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -6788,6 +6788,33 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pcmpgtq256_mask: case X86::BI__builtin_ia32_pcmpgtq512_mask: return EmitX86MaskedCompare(*this, ICmpInst::ICMP_SGT, Ops); + + // TODO: Handle 64/256/512-bit vector widths of min/max. + case X86::BI__builtin_ia32_pmaxsb128: + case X86::BI__builtin_ia32_pmaxsw128: + case X86::BI__builtin_ia32_pmaxsd128: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + case X86::BI__builtin_ia32_pmaxub128: + case X86::BI__builtin_ia32_pmaxuw128: + case X86::BI__builtin_ia32_pmaxud128: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + case X86::BI__builtin_ia32_pminsb128: + case X86::BI__builtin_ia32_pminsw128: + case X86::BI__builtin_ia32_pminsd128: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + case X86::BI__builtin_ia32_pminub128: + case X86::BI__builtin_ia32_pminuw128: + case X86::BI__builtin_ia32_pminud128: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { diff --git a/test/CodeGen/sse2-builtins.c b/test/CodeGen/sse2-builtins.c index f08feb0e3a..af4c4009c3 100644 --- a/test/CodeGen/sse2-builtins.c +++ b/test/CodeGen/sse2-builtins.c @@ -679,13 +679,15 @@ void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) { __m128i test_mm_max_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_max_epi16 - // CHECK: call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] return _mm_max_epi16(A, B); } __m128i test_mm_max_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_max_epu8 - // CHECK: call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] return _mm_max_epu8(A, B); } @@ -709,13 +711,15 @@ void test_mm_mfence() { __m128i test_mm_min_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_min_epi16 - // CHECK: call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] return _mm_min_epi16(A, B); } __m128i test_mm_min_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_min_epu8 - // CHECK: call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] return _mm_min_epu8(A, B); } diff --git a/test/CodeGen/sse41-builtins.c b/test/CodeGen/sse41-builtins.c index 0335659502..6f984f8a78 100644 --- a/test/CodeGen/sse41-builtins.c +++ b/test/CodeGen/sse41-builtins.c @@ -245,49 +245,57 @@ __m128 test_mm_insert_ps(__m128 x, __m128 y) { __m128i test_mm_max_epi8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epi8 - // CHECK: call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] return _mm_max_epi8(x, y); } __m128i test_mm_max_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epi32 - // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] return _mm_max_epi32(x, y); } __m128i test_mm_max_epu16(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epu16 - // CHECK: call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] return _mm_max_epu16(x, y); } __m128i test_mm_max_epu32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epu32 - // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] return _mm_max_epu32(x, y); } __m128i test_mm_min_epi8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epi8 - // CHECK: call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] return _mm_min_epi8(x, y); } __m128i test_mm_min_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epi32 - // CHECK: call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] return _mm_min_epi32(x, y); } __m128i test_mm_min_epu16(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epu16 - // CHECK: call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] return _mm_min_epu16(x, y); } __m128i test_mm_min_epu32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epu32 - // CHECK: call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] + // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] return _mm_min_epu32(x, y); } -- 2.40.0