From: Uriel Korach Date: Wed, 11 Oct 2017 08:39:25 +0000 (+0000) Subject: [X86] Added tests for TESTM and TESTNM (NFC) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=96eb407340c249e7661c66081b0524cb79fc9cc6;p=llvm [X86] Added tests for TESTM and TESTNM (NFC) Adding this test files now so after another commit that will add a new pattern for TESTM and TESTNM instructions will show the improvemnts that have been done. Change-Id: If3908b7f91897d764053312365a2bc1de78b291d git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315443 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/avx512bw-vec-test-testn.ll b/test/CodeGen/X86/avx512bw-vec-test-testn.ll new file mode 100644 index 00000000000..6dd6440faa1 --- /dev/null +++ b/test/CodeGen/X86/avx512bw-vec-test-testn.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s + +; Function Attrs: norecurse nounwind readnone +define zeroext i32 @TEST_mm512_test_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <32 x i16> + %1 = icmp ne <32 x i16> %0, zeroinitializer + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + + +; Function Attrs: norecurse nounwind readnone +define zeroext i64 @TEST_mm512_test_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovq %k0, %rax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <64 x i8> + %1 = icmp ne <64 x i8> %0, zeroinitializer + %2 = bitcast <64 x i1> %1 to i64 + ret i64 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i32 @TEST_mm512_mask_test_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <32 x i16> + %1 = icmp ne <32 x i16> %0, zeroinitializer + %2 = bitcast i32 %__U to <32 x i1> + %3 = and <32 x i1> %1, %2 + %4 = bitcast <32 x i1> %3 to i32 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i64 @TEST_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovq %rdi, %k1 +; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovq %k0, %rax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <64 x i8> + %1 = icmp ne <64 x i8> %0, zeroinitializer + %2 = bitcast i64 %__U to <64 x i1> + %3 = and <64 x i1> %1, %2 + %4 = bitcast <64 x i1> %3 to i64 + ret i64 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i32 @TEST_mm512_testn_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <32 x i16> + %1 = icmp eq <32 x i16> %0, zeroinitializer + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + + +; Function Attrs: norecurse nounwind readnone +define zeroext i64 @TEST_mm512_testn_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovq %k0, %rax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <64 x i8> + %1 = icmp eq <64 x i8> %0, zeroinitializer + %2 = bitcast <64 x i1> %1 to i64 + ret i64 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i32 @TEST_mm512_mask_testn_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <32 x i16> + %1 = icmp eq <32 x i16> %0, zeroinitializer + %2 = bitcast i32 %__U to <32 x i1> + %3 = and <32 x i1> %1, %2 + %4 = bitcast <32 x i1> %3 to i32 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i64 @TEST_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovq %rdi, %k1 +; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovq %k0, %rax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <64 x i8> + %1 = icmp eq <64 x i8> %0, zeroinitializer + %2 = bitcast i64 %__U to <64 x i1> + %3 = and <64 x i1> %1, %2 + %4 = bitcast <64 x i1> %3 to i64 + ret i64 %4 +} + diff --git a/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll b/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll new file mode 100644 index 00000000000..f67ceb2fe04 --- /dev/null +++ b/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll @@ -0,0 +1,320 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm_test_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + %1 = icmp ne <16 x i8> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm_mask_test_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_mask_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + %1 = icmp ne <16 x i8> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_test_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + %1 = icmp ne <8 x i16> %0, zeroinitializer + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_mask_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + %1 = icmp ne <8 x i16> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = and <8 x i1> %1, %2 + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm_testn_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + %1 = icmp eq <16 x i8> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm_mask_testn_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_mask_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + %1 = icmp eq <16 x i8> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_testn_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + %1 = icmp eq <8 x i16> %0, zeroinitializer + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_mask_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + %1 = icmp eq <8 x i16> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = and <8 x i1> %1, %2 + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define i32 @TEST_mm256_test_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + %1 = icmp ne <32 x i8> %0, zeroinitializer + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +; Function Attrs: norecurse nounwind readnone +define i32 @TEST_mm256_mask_test_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_mask_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + %1 = icmp ne <32 x i8> %0, zeroinitializer + %2 = bitcast i32 %__U to <32 x i1> + %3 = and <32 x i1> %1, %2 + %4 = bitcast <32 x i1> %3 to i32 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm256_test_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + %1 = icmp ne <16 x i16> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm256_mask_test_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_mask_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + %1 = icmp ne <16 x i16> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + +; Function Attrs: norecurse nounwind readnone +define i32 @TEST_mm256_testn_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + %1 = icmp eq <32 x i8> %0, zeroinitializer + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +; Function Attrs: norecurse nounwind readnone +define i32 @TEST_mm256_mask_testn_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_mask_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + %1 = icmp eq <32 x i8> %0, zeroinitializer + %2 = bitcast i32 %__U to <32 x i1> + %3 = and <32 x i1> %1, %2 + %4 = bitcast <32 x i1> %3 to i32 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm256_testn_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + %1 = icmp eq <16 x i16> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm256_mask_testn_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_mask_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + %1 = icmp eq <16 x i16> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + + diff --git a/test/CodeGen/X86/avx512f-vec-test-testn.ll b/test/CodeGen/X86/avx512f-vec-test-testn.ll new file mode 100644 index 00000000000..c9c0c2251a4 --- /dev/null +++ b/test/CodeGen/X86/avx512f-vec-test-testn.ll @@ -0,0 +1,163 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm512_test_epi64_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_test_epi64_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = icmp ne <8 x i64> %and.i.i, zeroinitializer + %1 = bitcast <8 x i1> %0 to i8 + ret i8 %1 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm512_test_epi32_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_test_epi32_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <16 x i32> + %1 = icmp ne <16 x i32> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm512_mask_test_epi64_mask(i8 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_test_epi64_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = icmp ne <8 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = and <8 x i1> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm512_mask_test_epi32_mask(i16 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_test_epi32_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <16 x i32> + %1 = icmp ne <16 x i32> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm512_testn_epi64_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_testn_epi64_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = icmp eq <8 x i64> %and.i.i, zeroinitializer + %1 = bitcast <8 x i1> %0 to i8 + ret i8 %1 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm512_testn_epi32_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_testn_epi32_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <16 x i32> + %1 = icmp eq <16 x i32> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm512_mask_testn_epi64_mask(i8 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_testn_epi64_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = icmp eq <8 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = and <8 x i1> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm512_mask_testn_epi32_mask(i16 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_testn_epi32_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <16 x i32> + %1 = icmp eq <16 x i32> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + diff --git a/test/CodeGen/X86/avx512vl-vec-test-testn.ll b/test/CodeGen/X86/avx512vl-vec-test-testn.ll new file mode 100644 index 00000000000..f1919cb118c --- /dev/null +++ b/test/CodeGen/X86/avx512vl-vec-test-testn.ll @@ -0,0 +1,504 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X86_64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=I386 + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_test_epi64_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_test_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_test_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = icmp ne <2 x i64> %and.i.i, zeroinitializer + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_test_epi32_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_test_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_test_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + %1 = icmp ne <4 x i32> %0, zeroinitializer + %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_test_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_test_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_test_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = icmp ne <4 x i64> %and.i.i, zeroinitializer + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_test_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_test_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_test_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + %1 = icmp ne <8 x i32> %0, zeroinitializer + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_test_epi64_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_mask_test_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_mask_test_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = icmp ne <2 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %3 = and <2 x i1> %0, %2 + %4 = shufflevector <2 x i1> %3, <2 x i1> zeroinitializer, <8 x i32> + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_test_epi32_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_mask_test_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_mask_test_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + %1 = icmp ne <4 x i32> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %1, %3 + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_mask_test_epi64_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_mask_test_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_mask_test_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = icmp ne <4 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %3 = and <4 x i1> %0, %2 + %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_mask_test_epi32_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_mask_test_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_mask_test_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + %1 = icmp ne <8 x i32> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = and <8 x i1> %1, %2 + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_testn_epi64_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_testn_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_testn_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = icmp eq <2 x i64> %and.i.i, zeroinitializer + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_testn_epi32_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_testn_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_testn_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + %1 = icmp eq <4 x i32> %0, zeroinitializer + %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_testn_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_testn_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_testn_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = icmp eq <4 x i64> %and.i.i, zeroinitializer + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_testn_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_testn_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_testn_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + %1 = icmp eq <8 x i32> %0, zeroinitializer + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_testn_epi64_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_mask_testn_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_mask_testn_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = icmp eq <2 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %3 = and <2 x i1> %0, %2 + %4 = shufflevector <2 x i1> %3, <2 x i1> zeroinitializer, <8 x i32> + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_testn_epi32_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_mask_testn_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_mask_testn_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + %1 = icmp eq <4 x i32> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %1, %3 + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_mask_testn_epi64_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_mask_testn_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_mask_testn_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = icmp eq <4 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %3 = and <4 x i1> %0, %2 + %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_mask_testn_epi32_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_mask_testn_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 +; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_mask_testn_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 +; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + %1 = icmp eq <8 x i32> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = and <8 x i1> %1, %2 + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} +