From: Craig Topper Date: Mon, 31 Oct 2016 05:55:57 +0000 (+0000) Subject: [AVX-512] Add missing patterns for selecting masked vector extracts that started... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ddcef0e5a4b9ad1492116ef40389ebc3718e391c;p=llvm [AVX-512] Add missing patterns for selecting masked vector extracts that started from shuffles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@285546 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index e1d20f636f2..cef834ba506 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -590,8 +590,9 @@ def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), //--- multiclass vextract_for_size { + X86VectorVTInfo From, X86VectorVTInfo To, + PatFrag vextract_extract, + SDNodeXForm EXTRACT_get_vextract_imm> { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { // use AVX512_maskable_in_asm (AVX512_maskable can't be used due to @@ -622,6 +623,24 @@ multiclass vextract_for_size, EVEX_K, EVEX; } + def : Pat<(To.VT (vselect To.KRCWM:$mask, + (vextract_extract:$ext (From.VT From.RC:$src1), + (iPTR imm)), + To.RC:$src0)), + (!cast(NAME # To.EltSize # "x" # To.NumElts # + From.ZSuffix # "rrk") + To.RC:$src0, To.KRCWM:$mask, From.RC:$src1, + (EXTRACT_get_vextract_imm To.RC:$ext))>; + + def : Pat<(To.VT (vselect To.KRCWM:$mask, + (vextract_extract:$ext (From.VT From.RC:$src1), + (iPTR imm)), + To.ImmAllZerosV)), + (!cast(NAME # To.EltSize # "x" # To.NumElts # + From.ZSuffix # "rrkz") + To.KRCWM:$mask, From.RC:$src1, + (EXTRACT_get_vextract_imm To.RC:$ext))>; + // Intrinsic call with masking. def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # "x" # To.NumElts # "_" # From.Size) @@ -667,39 +686,45 @@ multiclass vextract_for_size_lowering { + ValueType EltVT64, int Opcode256> { defm NAME # "32x4Z" : vextract_for_size, X86VectorVTInfo< 4, EltVT32, VR128X>, - vextract128_extract>, + vextract128_extract, + EXTRACT_get_vextract128_imm>, EVEX_V512, EVEX_CD8<32, CD8VT4>; defm NAME # "64x4Z" : vextract_for_size, X86VectorVTInfo< 4, EltVT64, VR256X>, - vextract256_extract>, + vextract256_extract, + EXTRACT_get_vextract256_imm>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; let Predicates = [HasVLX] in defm NAME # "32x4Z256" : vextract_for_size, X86VectorVTInfo< 4, EltVT32, VR128X>, - vextract128_extract>, + vextract128_extract, + EXTRACT_get_vextract128_imm>, EVEX_V256, EVEX_CD8<32, CD8VT4>; let Predicates = [HasVLX, HasDQI] in defm NAME # "64x2Z256" : vextract_for_size, X86VectorVTInfo< 2, EltVT64, VR128X>, - vextract128_extract>, + vextract128_extract, + EXTRACT_get_vextract128_imm>, VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>; let Predicates = [HasDQI] in { defm NAME # "64x2Z" : vextract_for_size, X86VectorVTInfo< 2, EltVT64, VR128X>, - vextract128_extract>, + vextract128_extract, + EXTRACT_get_vextract128_imm>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; defm NAME # "32x8Z" : vextract_for_size, X86VectorVTInfo< 8, EltVT32, VR256X>, - vextract256_extract>, + vextract256_extract, + EXTRACT_get_vextract256_imm>, EVEX_V512, EVEX_CD8<32, CD8VT8>; } } diff --git a/test/CodeGen/X86/avx512-extract-subvector.ll b/test/CodeGen/X86/avx512-extract-subvector.ll index de4b541c9db..21419bd0296 100644 --- a/test/CodeGen/X86/avx512-extract-subvector.ll +++ b/test/CodeGen/X86/avx512-extract-subvector.ll @@ -344,3 +344,232 @@ entry: store <32 x i8> %0, <32 x i8>* %1, align 1 ret void } + +define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) { +; SKX-LABEL: test_mm512_mask_extractf64x4_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf64x4 $1, %zmm1, %ymm0 {%k1} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> %__W + ret <4 x double> %1 +} + +define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) { +; SKX-LABEL: test_mm512_maskz_extractf64x4_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> zeroinitializer + ret <4 x double> %1 +} + +define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) { +; SKX-LABEL: test_mm512_mask_extractf32x4_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf32x4 $1, %zmm1, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x double> %__A to <16 x float> + %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> + %1 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W + ret <4 x float> %2 +} + +define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) { +; SKX-LABEL: test_mm512_maskz_extractf32x4_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x double> %__A to <16 x float> + %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> + %1 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer + ret <4 x float> %2 +} + +define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) { +; SKX-LABEL: test_mm256_mask_extractf64x2_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf64x2 $1, %ymm1, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W + ret <2 x double> %1 +} + +define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) { +; SKX-LABEL: test_mm256_maskz_extractf64x2_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer + ret <2 x double> %1 +} + +define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) { +; SKX-LABEL: test_mm256_mask_extracti64x2_epi64: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> %__W + ret <2 x i64> %1 +} + +define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) { +; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer + ret <2 x i64> %1 +} + +define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) { +; SKX-LABEL: test_mm256_mask_extractf32x4_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf32x4 $1, %ymm1, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W + ret <4 x float> %1 +} + +define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) { +; SKX-LABEL: test_mm256_maskz_extractf32x4_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer + ret <4 x float> %1 +} + +define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) { +; SKX-LABEL: test_mm256_mask_extracti32x4_epi32: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextracti32x4 $1, %ymm1, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__A to <8 x i32> + %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %1 = bitcast <2 x i64> %__W to <4 x i32> + %2 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + %3 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> %1 + %4 = bitcast <4 x i32> %3 to <2 x i64> + ret <2 x i64> %4 +} + +define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) { +; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__A to <8 x i32> + %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %1 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer + %3 = bitcast <4 x i32> %2 to <2 x i64> + ret <2 x i64> %3 +} + +define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) { +; SKX-LABEL: test_mm512_mask_extractf32x8_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf32x8 $1, %zmm1, %ymm0 {%k1} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> %__W + ret <8 x float> %1 +} + +define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) { +; SKX-LABEL: test_mm512_maskz_extractf32x8_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> zeroinitializer + ret <8 x float> %1 +} + +define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) { +; SKX-LABEL: test_mm512_mask_extractf64x2_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf64x2 $3, %zmm1, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W + ret <2 x double> %1 +} + +define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) { +; SKX-LABEL: test_mm512_maskz_extractf64x2_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> + %0 = bitcast i8 %__U to <8 x i1> + %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer + ret <2 x double> %1 +}