From: Craig Topper Date: Sat, 16 Dec 2017 08:26:22 +0000 (+0000) Subject: [X86] Implement kand/kandn/kor/kxor/kxnor/knot intrinsics using native IR. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d073eeed8abada8922c0566577e94c80a064c48b;p=clang [X86] Implement kand/kandn/kor/kxor/kxnor/knot intrinsics using native IR. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@320919 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index a1a2cef822..3ecd1c6697 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -7564,6 +7564,19 @@ static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); } +static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, + unsigned NumElts, SmallVectorImpl &Ops, + bool InvertLHS = false) { + Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); + Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); + + if (InvertLHS) + LHS = CGF.Builder.CreateNot(LHS); + + return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), + CGF.Builder.getIntNTy(std::max(NumElts, 8U))); +} + static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, SmallVectorImpl &Ops, llvm::Type *DstTy, @@ -8217,6 +8230,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_kandhi: + return EmitX86MaskLogic(*this, Instruction::And, 16, Ops); + case X86::BI__builtin_ia32_kandnhi: + return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true); + case X86::BI__builtin_ia32_korhi: + return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); + case X86::BI__builtin_ia32_kxnorhi: + return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true); + case X86::BI__builtin_ia32_kxorhi: + return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops); + case X86::BI__builtin_ia32_knothi: { + Ops[0] = getMaskVecValue(*this, Ops[0], 16); + return Builder.CreateBitCast(Builder.CreateNot(Ops[0]), + Builder.getInt16Ty()); + } + case X86::BI__builtin_ia32_vplzcntd_128_mask: case X86::BI__builtin_ia32_vplzcntd_256_mask: case X86::BI__builtin_ia32_vplzcntd_512_mask: diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index a4e9e3bc9d..9a3bf5a244 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -385,7 +385,9 @@ __m512d test_mm512_set1_pd(double d) __mmask16 test_mm512_knot(__mmask16 a) { // CHECK-LABEL: @test_mm512_knot - // CHECK: @llvm.x86.avx512.knot.w + // CHECK: [[IN:%.*]] = bitcast i16 %1 to <16 x i1> + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], + // CHECK: bitcast <16 x i1> [[NOT]] to i16 return _mm512_knot(a); } @@ -6211,22 +6213,38 @@ __m512i test_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __ return _mm512_mask_permutexvar_epi32(__W, __M, __X, __Y); } -__mmask16 test_mm512_kand(__mmask16 __A, __mmask16 __B) { +__mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: @test_mm512_kand - // CHECK: @llvm.x86.avx512.kand.w - return _mm512_kand(__A, __B); + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RES:%.*]] = and <16 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_mm512_kand(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); } -__mmask16 test_mm512_kandn(__mmask16 __A, __mmask16 __B) { +__mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: @test_mm512_kandn - // CHECK: @llvm.x86.avx512.kandn.w - return _mm512_kandn(__A, __B); + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_mm512_kandn(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); } -__mmask16 test_mm512_kor(__mmask16 __A, __mmask16 __B) { +__mmask16 test_mm512_kor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: @test_mm512_kor - // CHECK: @llvm.x86.avx512.kor.w - return _mm512_kor(__A, __B); + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RES:%.*]] = or <16 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_mm512_kor(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); } int test_mm512_kortestc(__mmask16 __A, __mmask16 __B) { @@ -6254,16 +6272,27 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D __E, __F); } -__mmask16 test_mm512_kxnor(__mmask16 __A, __mmask16 __B) { +__mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: @test_mm512_kxnor - // CHECK: @llvm.x86.avx512.kxnor.w - return _mm512_kxnor(__A, __B); + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_mm512_kxnor(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); } -__mmask16 test_mm512_kxor(__mmask16 __A, __mmask16 __B) { +__mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: @test_mm512_kxor - // CHECK: @llvm.x86.avx512.kxor.w - return _mm512_kxor(__A, __B); + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RES:%.*]] = xor <16 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_mm512_kxor(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); } void test_mm512_stream_si512(__m512i * __P, __m512i __A) {