From fac495754d91e70a7f53a42ff27ba5f600d8b1bf Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 17 Jan 2017 15:02:01 +0000 Subject: [PATCH] [X86][SSE] Tests showing horizontal all_of/any_of of vector comparison results git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292223 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vector-compare-all_of.ll | 801 ++++++++++++++++++++++ test/CodeGen/X86/vector-compare-any_of.ll | 801 ++++++++++++++++++++++ 2 files changed, 1602 insertions(+) create mode 100644 test/CodeGen/X86/vector-compare-all_of.ll create mode 100644 test/CodeGen/X86/vector-compare-any_of.ll diff --git a/test/CodeGen/X86/vector-compare-all_of.ll b/test/CodeGen/X86/vector-compare-all_of.ll new file mode 100644 index 00000000000..57cc295a805 --- /dev/null +++ b/test/CodeGen/X86/vector-compare-all_of.ll @@ -0,0 +1,801 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 + +define i64 @test_v2f64(<2 x double> %a0, <2 x double> %a1) { +; SSE-LABEL: test_v2f64: +; SSE: # BB#0: +; SSE-NEXT: cmpltpd %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %rax +; SSE-NEXT: retq +; +; AVX-LABEL: test_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovq %xmm0, %rax +; AVX-NEXT: retq + %c = fcmp ogt <2 x double> %a0, %a1 + %s = sext <2 x i1> %c to <2 x i64> + %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> + %2 = and <2 x i64> %s, %1 + %3 = extractelement <2 x i64> %2, i32 0 + ret i64 %3 +} + +define i64 @test_v4f64(<4 x double> %a0, <4 x double> %a1) { +; SSE-LABEL: test_v4f64: +; SSE: # BB#0: +; SSE-NEXT: cmpltpd %xmm1, %xmm3 +; SSE-NEXT: cmpltpd %xmm0, %xmm2 +; SSE-NEXT: andpd %xmm3, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: movd %xmm0, %rax +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v4f64: +; AVX1: # BB#0: +; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v4f64: +; AVX2: # BB#0: +; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = fcmp ogt <4 x double> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i64> + %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> + %2 = and <4 x i64> %s, %1 + %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> + %4 = and <4 x i64> %2, %3 + %5 = extractelement <4 x i64> %4, i64 0 + ret i64 %5 +} + +define i64 @test_v4f64_legal(<4 x double> %a0, <4 x double> %a1) { +; SSE-LABEL: test_v4f64_legal: +; SSE: # BB#0: +; SSE-NEXT: cmpltpd %xmm1, %xmm3 +; SSE-NEXT: cmpltpd %xmm0, %xmm2 +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: cltq +; SSE-NEXT: retq +; +; AVX-LABEL: test_v4f64_legal: +; AVX: # BB#0: +; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: cltq +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq + %c = fcmp ogt <4 x double> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i32> + %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> + %2 = and <4 x i32> %s, %1 + %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> + %4 = and <4 x i32> %2, %3 + %5 = extractelement <4 x i32> %4, i64 0 + %6 = sext i32 %5 to i64 + ret i64 %6 +} + +define i32 @test_v4f32(<4 x float> %a0, <4 x float> %a1) { +; SSE-LABEL: test_v4f32: +; SSE: # BB#0: +; SSE-NEXT: cmpltps %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: retq +; +; AVX-LABEL: test_v4f32: +; AVX: # BB#0: +; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: retq + %c = fcmp ogt <4 x float> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i32> + %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> + %2 = and <4 x i32> %s, %1 + %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> + %4 = and <4 x i32> %2, %3 + %5 = extractelement <4 x i32> %4, i32 0 + ret i32 %5 +} + +define i32 @test_v8f32(<8 x float> %a0, <8 x float> %a1) { +; SSE-LABEL: test_v8f32: +; SSE: # BB#0: +; SSE-NEXT: cmpltps %xmm1, %xmm3 +; SSE-NEXT: cmpltps %xmm0, %xmm2 +; SSE-NEXT: andps %xmm3, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v8f32: +; AVX1: # BB#0: +; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v8f32: +; AVX2: # BB#0: +; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = fcmp ogt <8 x float> %a0, %a1 + %s = sext <8 x i1> %c to <8 x i32> + %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> + %2 = and <8 x i32> %s, %1 + %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> + %4 = and <8 x i32> %2, %3 + %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> + %6 = and <8 x i32> %4, %5 + %7 = extractelement <8 x i32> %6, i32 0 + ret i32 %7 +} + +define i32 @test_v8f32_legal(<8 x float> %a0, <8 x float> %a1) { +; SSE-LABEL: test_v8f32_legal: +; SSE: # BB#0: +; SSE-NEXT: cmpltps %xmm1, %xmm3 +; SSE-NEXT: cmpltps %xmm0, %xmm2 +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: cwtl +; SSE-NEXT: retq +; +; AVX-LABEL: test_v8f32_legal: +; AVX: # BB#0: +; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: cwtl +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq + %c = fcmp ogt <8 x float> %a0, %a1 + %s = sext <8 x i1> %c to <8 x i16> + %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> + %2 = and <8 x i16> %s, %1 + %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> + %4 = and <8 x i16> %2, %3 + %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> + %6 = and <8 x i16> %4, %5 + %7 = extractelement <8 x i16> %6, i32 0 + %8 = sext i16 %7 to i32 + ret i32 %8 +} + +define i64 @test_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +; SSE-LABEL: test_v2i64: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtq %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %rax +; SSE-NEXT: retq +; +; AVX-LABEL: test_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovq %xmm0, %rax +; AVX-NEXT: retq + %c = icmp sgt <2 x i64> %a0, %a1 + %s = sext <2 x i1> %c to <2 x i64> + %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> + %2 = and <2 x i64> %s, %1 + %3 = extractelement <2 x i64> %2, i32 0 + ret i64 %3 +} + +define i64 @test_v4i64(<4 x i64> %a0, <4 x i64> %a1) { +; SSE-LABEL: test_v4i64: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtq %xmm3, %xmm1 +; SSE-NEXT: pcmpgtq %xmm2, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %rax +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v4i64: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v4i64: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <4 x i64> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i64> + %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> + %2 = and <4 x i64> %s, %1 + %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> + %4 = and <4 x i64> %2, %3 + %5 = extractelement <4 x i64> %4, i64 0 + ret i64 %5 +} + +define i64 @test_v4i64_legal(<4 x i64> %a0, <4 x i64> %a1) { +; SSE-LABEL: test_v4i64_legal: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtq %xmm3, %xmm1 +; SSE-NEXT: pcmpgtq %xmm2, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: cltq +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v4i64_legal: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v4i64_legal: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: cltq +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <4 x i64> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i32> + %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> + %2 = and <4 x i32> %s, %1 + %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> + %4 = and <4 x i32> %2, %3 + %5 = extractelement <4 x i32> %4, i64 0 + %6 = sext i32 %5 to i64 + ret i64 %6 +} + +define i32 @test_v4i32(<4 x i32> %a0, <4 x i32> %a1) { +; SSE-LABEL: test_v4i32: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: retq +; +; AVX-LABEL: test_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: retq + %c = icmp sgt <4 x i32> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i32> + %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> + %2 = and <4 x i32> %s, %1 + %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> + %4 = and <4 x i32> %2, %3 + %5 = extractelement <4 x i32> %4, i32 0 + ret i32 %5 +} + +define i32 @test_v8i32(<8 x i32> %a0, <8 x i32> %a1) { +; SSE-LABEL: test_v8i32: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v8i32: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v8i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <8 x i32> %a0, %a1 + %s = sext <8 x i1> %c to <8 x i32> + %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> + %2 = and <8 x i32> %s, %1 + %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> + %4 = and <8 x i32> %2, %3 + %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> + %6 = and <8 x i32> %4, %5 + %7 = extractelement <8 x i32> %6, i32 0 + ret i32 %7 +} + +define i32 @test_v8i32_legal(<8 x i32> %a0, <8 x i32> %a1) { +; SSE-LABEL: test_v8i32_legal: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: cwtl +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v8i32_legal: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v8i32_legal: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: cwtl +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <8 x i32> %a0, %a1 + %s = sext <8 x i1> %c to <8 x i16> + %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> + %2 = and <8 x i16> %s, %1 + %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> + %4 = and <8 x i16> %2, %3 + %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> + %6 = and <8 x i16> %4, %5 + %7 = extractelement <8 x i16> %6, i32 0 + %8 = sext i16 %7 to i32 + ret i32 %8 +} + +define i16 @test_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: test_v8i16: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: %AX %AX %EAX +; SSE-NEXT: retq +; +; AVX-LABEL: test_v8i16: +; AVX: # BB#0: +; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: # kill: %AX %AX %EAX +; AVX-NEXT: retq + %c = icmp sgt <8 x i16> %a0, %a1 + %s = sext <8 x i1> %c to <8 x i16> + %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> + %2 = and <8 x i16> %s, %1 + %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> + %4 = and <8 x i16> %2, %3 + %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> + %6 = and <8 x i16> %4, %5 + %7 = extractelement <8 x i16> %6, i32 0 + ret i16 %7 +} + +define i16 @test_v16i16(<16 x i16> %a0, <16 x i16> %a1) { +; SSE-LABEL: test_v16i16: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtw %xmm3, %xmm1 +; SSE-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: %AX %AX %EAX +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v16i16: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: # kill: %AX %AX %EAX +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v16i16: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: # kill: %AX %AX %EAX +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <16 x i16> %a0, %a1 + %s = sext <16 x i1> %c to <16 x i16> + %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> + %2 = and <16 x i16> %s, %1 + %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> + %4 = and <16 x i16> %2, %3 + %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> + %6 = and <16 x i16> %4, %5 + %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> + %8 = and <16 x i16> %6, %7 + %9 = extractelement <16 x i16> %8, i32 0 + ret i16 %9 +} + +define i16 @test_v16i16_legal(<16 x i16> %a0, <16 x i16> %a1) { +; SSE-LABEL: test_v16i16_legal: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtw %xmm3, %xmm1 +; SSE-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pextrb $0, %xmm0, %eax +; SSE-NEXT: movsbl %al, %eax +; SSE-NEXT: # kill: %AX %AX %EAX +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v16i16_legal: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: # kill: %AX %AX %EAX +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v16i16_legal: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: # kill: %AX %AX %EAX +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <16 x i16> %a0, %a1 + %s = sext <16 x i1> %c to <16 x i8> + %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> + %2 = and <16 x i8> %s, %1 + %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> + %4 = and <16 x i8> %2, %3 + %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> + %6 = and <16 x i8> %4, %5 + %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> + %8 = and <16 x i8> %6, %7 + %9 = extractelement <16 x i8> %8, i32 0 + %10 = sext i8 %9 to i16 + ret i16 %10 +} + +define i8 @test_v16i8(<16 x i8> %a0, <16 x i8> %a1) { +; SSE-LABEL: test_v16i8: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtb %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pextrb $0, %xmm0, %eax +; SSE-NEXT: # kill: %AL %AL %EAX +; SSE-NEXT: retq +; +; AVX-LABEL: test_v16i8: +; AVX: # BB#0: +; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: # kill: %AL %AL %EAX +; AVX-NEXT: retq + %c = icmp sgt <16 x i8> %a0, %a1 + %s = sext <16 x i1> %c to <16 x i8> + %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> + %2 = and <16 x i8> %s, %1 + %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> + %4 = and <16 x i8> %2, %3 + %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> + %6 = and <16 x i8> %4, %5 + %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> + %8 = and <16 x i8> %6, %7 + %9 = extractelement <16 x i8> %8, i32 0 + ret i8 %9 +} + +define i8 @test_v32i8(<32 x i8> %a0, <32 x i8> %a1) { +; SSE-LABEL: test_v32i8: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtb %xmm3, %xmm1 +; SSE-NEXT: pcmpgtb %xmm2, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pextrb $0, %xmm0, %eax +; SSE-NEXT: # kill: %AL %AL %EAX +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v32i8: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: # kill: %AL %AL %EAX +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v32i8: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: # kill: %AL %AL %EAX +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <32 x i8> %a0, %a1 + %s = sext <32 x i1> %c to <32 x i8> + %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> + %2 = and <32 x i8> %s, %1 + %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> + %4 = and <32 x i8> %2, %3 + %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> + %6 = and <32 x i8> %4, %5 + %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> + %8 = and <32 x i8> %6, %7 + %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> + %10 = and <32 x i8> %8, %9 + %11 = extractelement <32 x i8> %10, i32 0 + ret i8 %11 +} diff --git a/test/CodeGen/X86/vector-compare-any_of.ll b/test/CodeGen/X86/vector-compare-any_of.ll new file mode 100644 index 00000000000..40966bdfe21 --- /dev/null +++ b/test/CodeGen/X86/vector-compare-any_of.ll @@ -0,0 +1,801 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 + +define i64 @test_v2f64(<2 x double> %a0, <2 x double> %a1) { +; SSE-LABEL: test_v2f64: +; SSE: # BB#0: +; SSE-NEXT: cmpltpd %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %rax +; SSE-NEXT: retq +; +; AVX-LABEL: test_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovq %xmm0, %rax +; AVX-NEXT: retq + %c = fcmp ogt <2 x double> %a0, %a1 + %s = sext <2 x i1> %c to <2 x i64> + %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> + %2 = or <2 x i64> %s, %1 + %3 = extractelement <2 x i64> %2, i32 0 + ret i64 %3 +} + +define i64 @test_v4f64(<4 x double> %a0, <4 x double> %a1) { +; SSE-LABEL: test_v4f64: +; SSE: # BB#0: +; SSE-NEXT: cmpltpd %xmm1, %xmm3 +; SSE-NEXT: cmpltpd %xmm0, %xmm2 +; SSE-NEXT: orpd %xmm3, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: movd %xmm0, %rax +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v4f64: +; AVX1: # BB#0: +; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v4f64: +; AVX2: # BB#0: +; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vorpd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = fcmp ogt <4 x double> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i64> + %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> + %2 = or <4 x i64> %s, %1 + %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> + %4 = or <4 x i64> %2, %3 + %5 = extractelement <4 x i64> %4, i64 0 + ret i64 %5 +} + +define i64 @test_v4f64_legal(<4 x double> %a0, <4 x double> %a1) { +; SSE-LABEL: test_v4f64_legal: +; SSE: # BB#0: +; SSE-NEXT: cmpltpd %xmm1, %xmm3 +; SSE-NEXT: cmpltpd %xmm0, %xmm2 +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: cltq +; SSE-NEXT: retq +; +; AVX-LABEL: test_v4f64_legal: +; AVX: # BB#0: +; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: cltq +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq + %c = fcmp ogt <4 x double> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i32> + %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> + %2 = or <4 x i32> %s, %1 + %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> + %4 = or <4 x i32> %2, %3 + %5 = extractelement <4 x i32> %4, i64 0 + %6 = sext i32 %5 to i64 + ret i64 %6 +} + +define i32 @test_v4f32(<4 x float> %a0, <4 x float> %a1) { +; SSE-LABEL: test_v4f32: +; SSE: # BB#0: +; SSE-NEXT: cmpltps %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: retq +; +; AVX-LABEL: test_v4f32: +; AVX: # BB#0: +; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: retq + %c = fcmp ogt <4 x float> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i32> + %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> + %2 = or <4 x i32> %s, %1 + %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> + %4 = or <4 x i32> %2, %3 + %5 = extractelement <4 x i32> %4, i32 0 + ret i32 %5 +} + +define i32 @test_v8f32(<8 x float> %a0, <8 x float> %a1) { +; SSE-LABEL: test_v8f32: +; SSE: # BB#0: +; SSE-NEXT: cmpltps %xmm1, %xmm3 +; SSE-NEXT: cmpltps %xmm0, %xmm2 +; SSE-NEXT: orps %xmm3, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v8f32: +; AVX1: # BB#0: +; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v8f32: +; AVX2: # BB#0: +; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = fcmp ogt <8 x float> %a0, %a1 + %s = sext <8 x i1> %c to <8 x i32> + %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> + %2 = or <8 x i32> %s, %1 + %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> + %4 = or <8 x i32> %2, %3 + %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> + %6 = or <8 x i32> %4, %5 + %7 = extractelement <8 x i32> %6, i32 0 + ret i32 %7 +} + +define i32 @test_v8f32_legal(<8 x float> %a0, <8 x float> %a1) { +; SSE-LABEL: test_v8f32_legal: +; SSE: # BB#0: +; SSE-NEXT: cmpltps %xmm1, %xmm3 +; SSE-NEXT: cmpltps %xmm0, %xmm2 +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: cwtl +; SSE-NEXT: retq +; +; AVX-LABEL: test_v8f32_legal: +; AVX: # BB#0: +; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: cwtl +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq + %c = fcmp ogt <8 x float> %a0, %a1 + %s = sext <8 x i1> %c to <8 x i16> + %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> + %2 = or <8 x i16> %s, %1 + %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> + %4 = or <8 x i16> %2, %3 + %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> + %6 = or <8 x i16> %4, %5 + %7 = extractelement <8 x i16> %6, i32 0 + %8 = sext i16 %7 to i32 + ret i32 %8 +} + +define i64 @test_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +; SSE-LABEL: test_v2i64: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtq %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %rax +; SSE-NEXT: retq +; +; AVX-LABEL: test_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovq %xmm0, %rax +; AVX-NEXT: retq + %c = icmp sgt <2 x i64> %a0, %a1 + %s = sext <2 x i1> %c to <2 x i64> + %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> + %2 = or <2 x i64> %s, %1 + %3 = extractelement <2 x i64> %2, i32 0 + ret i64 %3 +} + +define i64 @test_v4i64(<4 x i64> %a0, <4 x i64> %a1) { +; SSE-LABEL: test_v4i64: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtq %xmm3, %xmm1 +; SSE-NEXT: pcmpgtq %xmm2, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %rax +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v4i64: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v4i64: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <4 x i64> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i64> + %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> + %2 = or <4 x i64> %s, %1 + %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> + %4 = or <4 x i64> %2, %3 + %5 = extractelement <4 x i64> %4, i64 0 + ret i64 %5 +} + +define i64 @test_v4i64_legal(<4 x i64> %a0, <4 x i64> %a1) { +; SSE-LABEL: test_v4i64_legal: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtq %xmm3, %xmm1 +; SSE-NEXT: pcmpgtq %xmm2, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: cltq +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v4i64_legal: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v4i64_legal: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: cltq +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <4 x i64> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i32> + %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> + %2 = or <4 x i32> %s, %1 + %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> + %4 = or <4 x i32> %2, %3 + %5 = extractelement <4 x i32> %4, i64 0 + %6 = sext i32 %5 to i64 + ret i64 %6 +} + +define i32 @test_v4i32(<4 x i32> %a0, <4 x i32> %a1) { +; SSE-LABEL: test_v4i32: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: retq +; +; AVX-LABEL: test_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: retq + %c = icmp sgt <4 x i32> %a0, %a1 + %s = sext <4 x i1> %c to <4 x i32> + %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> + %2 = or <4 x i32> %s, %1 + %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> + %4 = or <4 x i32> %2, %3 + %5 = extractelement <4 x i32> %4, i32 0 + ret i32 %5 +} + +define i32 @test_v8i32(<8 x i32> %a0, <8 x i32> %a1) { +; SSE-LABEL: test_v8i32: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v8i32: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v8i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <8 x i32> %a0, %a1 + %s = sext <8 x i1> %c to <8 x i32> + %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> + %2 = or <8 x i32> %s, %1 + %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> + %4 = or <8 x i32> %2, %3 + %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> + %6 = or <8 x i32> %4, %5 + %7 = extractelement <8 x i32> %6, i32 0 + ret i32 %7 +} + +define i32 @test_v8i32_legal(<8 x i32> %a0, <8 x i32> %a1) { +; SSE-LABEL: test_v8i32_legal: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: cwtl +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v8i32_legal: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v8i32_legal: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: cwtl +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <8 x i32> %a0, %a1 + %s = sext <8 x i1> %c to <8 x i16> + %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> + %2 = or <8 x i16> %s, %1 + %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> + %4 = or <8 x i16> %2, %3 + %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> + %6 = or <8 x i16> %4, %5 + %7 = extractelement <8 x i16> %6, i32 0 + %8 = sext i16 %7 to i32 + ret i32 %8 +} + +define i16 @test_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: test_v8i16: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: %AX %AX %EAX +; SSE-NEXT: retq +; +; AVX-LABEL: test_v8i16: +; AVX: # BB#0: +; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: # kill: %AX %AX %EAX +; AVX-NEXT: retq + %c = icmp sgt <8 x i16> %a0, %a1 + %s = sext <8 x i1> %c to <8 x i16> + %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> + %2 = or <8 x i16> %s, %1 + %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> + %4 = or <8 x i16> %2, %3 + %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> + %6 = or <8 x i16> %4, %5 + %7 = extractelement <8 x i16> %6, i32 0 + ret i16 %7 +} + +define i16 @test_v16i16(<16 x i16> %a0, <16 x i16> %a1) { +; SSE-LABEL: test_v16i16: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtw %xmm3, %xmm1 +; SSE-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: %AX %AX %EAX +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v16i16: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: # kill: %AX %AX %EAX +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v16i16: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: # kill: %AX %AX %EAX +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <16 x i16> %a0, %a1 + %s = sext <16 x i1> %c to <16 x i16> + %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> + %2 = or <16 x i16> %s, %1 + %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> + %4 = or <16 x i16> %2, %3 + %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> + %6 = or <16 x i16> %4, %5 + %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> + %8 = or <16 x i16> %6, %7 + %9 = extractelement <16 x i16> %8, i32 0 + ret i16 %9 +} + +define i16 @test_v16i16_legal(<16 x i16> %a0, <16 x i16> %a1) { +; SSE-LABEL: test_v16i16_legal: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtw %xmm3, %xmm1 +; SSE-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pextrb $0, %xmm0, %eax +; SSE-NEXT: movsbl %al, %eax +; SSE-NEXT: # kill: %AX %AX %EAX +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v16i16_legal: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: # kill: %AX %AX %EAX +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v16i16_legal: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: # kill: %AX %AX %EAX +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <16 x i16> %a0, %a1 + %s = sext <16 x i1> %c to <16 x i8> + %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> + %2 = or <16 x i8> %s, %1 + %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> + %4 = or <16 x i8> %2, %3 + %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> + %6 = or <16 x i8> %4, %5 + %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> + %8 = or <16 x i8> %6, %7 + %9 = extractelement <16 x i8> %8, i32 0 + %10 = sext i8 %9 to i16 + ret i16 %10 +} + +define i8 @test_v16i8(<16 x i8> %a0, <16 x i8> %a1) { +; SSE-LABEL: test_v16i8: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtb %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pextrb $0, %xmm0, %eax +; SSE-NEXT: # kill: %AL %AL %EAX +; SSE-NEXT: retq +; +; AVX-LABEL: test_v16i8: +; AVX: # BB#0: +; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: # kill: %AL %AL %EAX +; AVX-NEXT: retq + %c = icmp sgt <16 x i8> %a0, %a1 + %s = sext <16 x i1> %c to <16 x i8> + %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> + %2 = or <16 x i8> %s, %1 + %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> + %4 = or <16 x i8> %2, %3 + %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> + %6 = or <16 x i8> %4, %5 + %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> + %8 = or <16 x i8> %6, %7 + %9 = extractelement <16 x i8> %8, i32 0 + ret i8 %9 +} + +define i8 @test_v32i8(<32 x i8> %a0, <32 x i8> %a1) { +; SSE-LABEL: test_v32i8: +; SSE: # BB#0: +; SSE-NEXT: pcmpgtb %xmm3, %xmm1 +; SSE-NEXT: pcmpgtb %xmm2, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pextrb $0, %xmm0, %eax +; SSE-NEXT: # kill: %AL %AL %EAX +; SSE-NEXT: retq +; +; AVX1-LABEL: test_v32i8: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: # kill: %AL %AL %EAX +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v32i8: +; AVX2: # BB#0: +; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: # kill: %AL %AL %EAX +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %c = icmp sgt <32 x i8> %a0, %a1 + %s = sext <32 x i1> %c to <32 x i8> + %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> + %2 = or <32 x i8> %s, %1 + %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> + %4 = or <32 x i8> %2, %3 + %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> + %6 = or <32 x i8> %4, %5 + %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> + %8 = or <32 x i8> %6, %7 + %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> + %10 = or <32 x i8> %8, %9 + %11 = extractelement <32 x i8> %10, i32 0 + ret i8 %11 +} -- 2.40.0