From 55baf1ea948aeb1a7c55bddb090e9e1eea7c06f2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 6 Jun 2016 06:13:01 +0000 Subject: [PATCH] [AVX512] Convert masked palignr builtins directly to native IR similar to the other palignr builtins, but with a select to handle masking. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@271873 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGBuiltin.cpp | 28 +++++++++++++++++++++++----- test/CodeGen/avx512bw-builtins.c | 8 +++++--- test/CodeGen/avx512vlbw-builtins.c | 12 ++++++++---- 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 914c1c1cd2..b79a9a4a21 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -6653,7 +6653,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case X86::BI__builtin_ia32_palignr128: - case X86::BI__builtin_ia32_palignr256: { + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr128_mask: + case X86::BI__builtin_ia32_palignr256_mask: + case X86::BI__builtin_ia32_palignr512_mask: { unsigned ShiftVal = cast(Ops[2])->getZExtValue(); unsigned NumElts = @@ -6673,7 +6676,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); } - int Indices[32]; + int Indices[64]; // 256-bit palignr operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { @@ -6684,10 +6687,25 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } } - return Builder.CreateShuffleVector(Ops[1], Ops[0], - makeArrayRef(Indices, NumElts), - "palignr"); + Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], + makeArrayRef(Indices, NumElts), + "palignr"); + + // If this isn't a masked builtin, just return the align operation. + if (Ops.size() == 3) + return Align; + + // If the mask is all ones just emit the align operation. + if (const auto *C = dyn_cast(Ops[4])) + if (C->isAllOnesValue()) + return Align; + + llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), + NumElts); + llvm::Value *Mask = Builder.CreateBitCast(Ops[4], MaskTy, "cast"); + return Builder.CreateSelect(Mask, Align, Ops[3]); } + case X86::BI__builtin_ia32_pslldqi256: { // Shift value is in bits so divide by 8. unsigned shiftVal = cast(Ops[1])->getZExtValue() >> 3; diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c index 254304b3e0..7fe5b43917 100644 --- a/test/CodeGen/avx512bw-builtins.c +++ b/test/CodeGen/avx512bw-builtins.c @@ -1489,19 +1489,21 @@ __m512i test_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __ } __m512i test_mm512_alignr_epi8(__m512i __A,__m512i __B){ // CHECK-LABEL: @test_mm512_alignr_epi8 - // CHECK: @llvm.x86.avx512.mask.palignr.512 + // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> return _mm512_alignr_epi8(__A, __B, 2); } __m512i test_mm512_mask_alignr_epi8(__m512i __W, __mmask64 __U, __m512i __A,__m512i __B){ // CHECK-LABEL: @test_mm512_mask_alignr_epi8 - // CHECK: @llvm.x86.avx512.mask.palignr.512 + // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_alignr_epi8(__W, __U, __A, __B, 2); } __m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){ // CHECK-LABEL: @test_mm512_maskz_alignr_epi8 - // CHECK: @llvm.x86.avx512.mask.palignr.512 + // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_maskz_alignr_epi8(__U, __A, __B, 2); } diff --git a/test/CodeGen/avx512vlbw-builtins.c b/test/CodeGen/avx512vlbw-builtins.c index d5e357670e..7e7ac17dcd 100644 --- a/test/CodeGen/avx512vlbw-builtins.c +++ b/test/CodeGen/avx512vlbw-builtins.c @@ -2318,25 +2318,29 @@ __m256i test_mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __ } __m128i test_mm_mask_alignr_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_alignr_epi8 - // CHECK: @llvm.x86.avx512.mask.palignr.128 + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_alignr_epi8(__W, __U, __A, __B, 2); } __m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_alignr_epi8 - // CHECK: @llvm.x86.avx512.mask.palignr.128 + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_maskz_alignr_epi8(__U, __A, __B, 2); } __m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_alignr_epi8 - // CHECK: @llvm.x86.avx512.mask.palignr.256 + // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_alignr_epi8(__W, __U, __A, __B, 2); } __m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_alignr_epi8 - // CHECK: @llvm.x86.avx512.mask.palignr.256 + // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_maskz_alignr_epi8(__U, __A, __B, 2); } -- 2.40.0