From: Sanjay Patel Date: Tue, 7 Feb 2017 00:10:50 +0000 (+0000) Subject: [x86] add tests to show current codegen for vblendv*; NFC X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bcca9da4f8535cd7db594c3c25775987d203ec93;p=llvm [x86] add tests to show current codegen for vblendv*; NFC As noted in the comments, we should be able to eliminate cmp ops in several cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294263 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/vselect-pcmp.ll b/test/CodeGen/X86/vselect-pcmp.ll new file mode 100644 index 00000000000..4b39503e85a --- /dev/null +++ b/test/CodeGen/X86/vselect-pcmp.ll @@ -0,0 +1,319 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F + +; The condition vector for BLENDV* only cares about the sign bit of each element. +; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op. + +; Test 128-bit vectors for all legal element types. + +define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) { +; AVX-LABEL: signbit_sel_v16i8: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq + %tr = icmp slt <16 x i8> %mask, zeroinitializer + %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y + ret <16 x i8> %z +} + +; Sorry 16-bit, you're not important enough to support? + +define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) { +; AVX-LABEL: signbit_sel_v8i16: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 +; AVX-NEXT: vpandn %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %tr = icmp slt <8 x i16> %mask, zeroinitializer + %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y + ret <8 x i16> %z +} + +define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; AVX-LABEL: signbit_sel_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq + %tr = icmp slt <4 x i32> %mask, zeroinitializer + %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %z +} + +define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) { +; AVX-LABEL: signbit_sel_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq + %tr = icmp slt <2 x i64> %mask, zeroinitializer + %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y + ret <2 x i64> %z +} + +define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) { +; AVX-LABEL: signbit_sel_v4f32: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq + %tr = icmp slt <4 x i32> %mask, zeroinitializer + %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y + ret <4 x float> %z +} + +define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) { +; AVX-LABEL: signbit_sel_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq + %tr = icmp slt <2 x i64> %mask, zeroinitializer + %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y + ret <2 x double> %z +} + +; Test 256-bit vectors to see differences between AVX1 and AVX2. + +define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) { +; AVX1-LABEL: signbit_sel_v32i8: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtb %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_sel_v32i8: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v32i8: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq + %tr = icmp slt <32 x i8> %mask, zeroinitializer + %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y + ret <32 x i8> %z +} + +; Sorry 16-bit, you'll never be important enough to support? + +define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) { +; AVX1-LABEL: signbit_sel_v16i16: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtw %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtw %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_sel_v16i16: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpandn %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v16i16: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpandn %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq + %tr = icmp slt <16 x i16> %mask, zeroinitializer + %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y + ret <16 x i16> %z +} + +define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) { +; AVX1-LABEL: signbit_sel_v8i32: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_sel_v8i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v8i32: +; AVX512F: # BB#0: +; AVX512F-NEXT: # kill: %YMM2 %YMM2 %ZMM2 +; AVX512F-NEXT: # kill: %YMM1 %YMM1 %ZMM1 +; AVX512F-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512F-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512F-NEXT: retq + %tr = icmp slt <8 x i32> %mask, zeroinitializer + %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y + ret <8 x i32> %z +} + +define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) { +; AVX1-LABEL: signbit_sel_v4i64: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_sel_v4i64: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4i64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512F-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq + %tr = icmp slt <4 x i64> %mask, zeroinitializer + %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y + ret <4 x i64> %z +} + +define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) { +; AVX1-LABEL: signbit_sel_v4f64: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_sel_v4f64: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4f64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512F-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq + %tr = icmp slt <4 x i64> %mask, zeroinitializer + %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y + ret <4 x double> %z +} + +; Try a condition with a different type than the select operands. + +define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) { +; AVX1-LABEL: signbit_sel_v4f64_small_mask: +; AVX1: # BB#0: +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] +; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_sel_v4f64_small_mask: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4f64_small_mask: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpmovsxdq %xmm2, %ymm2 +; AVX512F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq + %tr = icmp slt <4 x i32> %mask, zeroinitializer + %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y + ret <4 x double> %z +} + +; Try a 512-bit vector to make sure AVX-512 is handled as expected. + +define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) { +; AVX1-LABEL: signbit_sel_v8f64: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 +; AVX1-NEXT: vpxor %xmm7, %xmm7, %xmm7 +; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 +; AVX1-NEXT: vpcmpgtq %xmm5, %xmm7, %xmm5 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 +; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6 +; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm7, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 +; AVX1-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_sel_v8f64: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %ymm6, %ymm6, %ymm6 +; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 +; AVX2-NEXT: vpcmpgtq %ymm4, %ymm6, %ymm4 +; AVX2-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v8f64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: retq + %tr = icmp slt <8 x i64> %mask, zeroinitializer + %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y + ret <8 x double> %z +} +