From 625d4a7a0d6c94642f974ff6413b6485f6c3237f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 17 Dec 2018 23:20:14 +0000 Subject: [PATCH] [X86] Add baseline tests for D55780 This adds tests for (add (umax X, C), -C) as part of fixing PR40053 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349416 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/psubus.ll | 522 +++++++++++++++++++++++++++++++++++++ 1 file changed, 522 insertions(+) diff --git a/test/CodeGen/X86/psubus.ll b/test/CodeGen/X86/psubus.ll index 2447c563fcf..9320eed534d 100644 --- a/test/CodeGen/X86/psubus.ll +++ b/test/CodeGen/X86/psubus.ll @@ -2411,3 +2411,525 @@ define void @subus_v2i16(<2 x i16>* %p1, <2 x i16>* %p2) { ret void } +define <16 x i8> @test19(<16 x i8> %x) { +; SSE-LABEL: test19: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm0 +; SSE-NEXT: paddb {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test19: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = icmp ugt <16 x i8> %x, + %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> + %2 = add <16 x i8> %1, + ret <16 x i8> %2 +} + +define <16 x i8> @test20(<16 x i8> %x) { +; SSE-LABEL: test20: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm0 +; SSE-NEXT: paddb {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test20: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = icmp ugt <16 x i8> %x, + %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> + %2 = add <16 x i8> %1, + ret <16 x i8> %2 +} + +define <8 x i16> @test21(<8 x i16> %x) { +; SSE2-LABEL: test21: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pmaxsw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: test21: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: pmaxsw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: test21: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: test21: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = icmp ugt <8 x i16> %x, + %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> + %2 = add <8 x i16> %1, + ret <8 x i16> %2 +} + +define <8 x i16> @test22(<8 x i16> %x) { +; SSE2-LABEL: test22: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pmaxsw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: test22: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: pmaxsw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: test22: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: test22: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = icmp ugt <8 x i16> %x, + %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> + %2 = add <8 x i16> %1, + ret <8 x i16> %2 +} + +define <32 x i8> @test23(<32 x i8> %x) { +; SSE-LABEL: test23: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70] +; SSE-NEXT: pmaxub %xmm2, %xmm1 +; SSE-NEXT: pmaxub %xmm2, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [186,186,186,186,186,186,186,186,186,186,186,186,186,186,186,186] +; SSE-NEXT: paddb %xmm2, %xmm0 +; SSE-NEXT: paddb %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: test23: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70] +; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [186,186,186,186,186,186,186,186,186,186,186,186,186,186,186,186] +; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test23: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpmaxub {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test23: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmaxub {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = icmp ugt <32 x i8> %x, + %1 = select <32 x i1> %0, <32 x i8> %x, <32 x i8> + %2 = add <32 x i8> %1, + ret <32 x i8> %2 +} + +define <32 x i8> @test24(<32 x i8> %x) { +; SSE-LABEL: test24: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm1 +; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm0 +; SSE-NEXT: paddb {{.*}}(%rip), %xmm0 +; SSE-NEXT: paddb {{.*}}(%rip), %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: test24: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpmaxub {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test24: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpmaxub {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test24: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmaxub {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = icmp ugt <32 x i8> %x, + %1 = select <32 x i1> %0, <32 x i8> %x, <32 x i8> + %2 = add <32 x i8> %1, + ret <32 x i8> %2 +} + +define <16 x i16> @test25(<16 x i16> %x) { +; SSE2-LABEL: test25: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [37768,37768,37768,37768,37768,37768,37768,37768] +; SSE2-NEXT: pmaxsw %xmm3, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pmaxsw %xmm3, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [60536,60536,60536,60536,60536,60536,60536,60536] +; SSE2-NEXT: paddw %xmm2, %xmm0 +; SSE2-NEXT: paddw %xmm2, %xmm1 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: test25: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [37768,37768,37768,37768,37768,37768,37768,37768] +; SSSE3-NEXT: pmaxsw %xmm3, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: pmaxsw %xmm3, %xmm0 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [60536,60536,60536,60536,60536,60536,60536,60536] +; SSSE3-NEXT: paddw %xmm2, %xmm0 +; SSSE3-NEXT: paddw %xmm2, %xmm1 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: test25: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [5000,5000,5000,5000,5000,5000,5000,5000] +; SSE41-NEXT: pmaxuw %xmm2, %xmm1 +; SSE41-NEXT: pmaxuw %xmm2, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [60536,60536,60536,60536,60536,60536,60536,60536] +; SSE41-NEXT: paddw %xmm2, %xmm0 +; SSE41-NEXT: paddw %xmm2, %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: test25: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5000,5000,5000,5000,5000,5000,5000,5000] +; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [60536,60536,60536,60536,60536,60536,60536,60536] +; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test25: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test25: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = icmp ugt <16 x i16> %x, + %1 = select <16 x i1> %0, <16 x i16> %x, <16 x i16> + %2 = add <16 x i16> %1, + ret <16 x i16> %2 +} + +define <16 x i16> @test26(<16 x i16> %x) { +; SSE2-LABEL: test26: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pmaxsw {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pmaxsw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm1 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: test26: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: pmaxsw {{.*}}(%rip), %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: pmaxsw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm1 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: test26: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm1 +; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: test26: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test26: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test26: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = icmp ugt <16 x i16> %x, + %1 = select <16 x i1> %0, <16 x i16> %x, <16 x i16> + %2 = add <16 x i16> %1, + ret <16 x i16> %2 +} + +define <64 x i8> @test27(<64 x i8> %x) { +; SSE-LABEL: test27: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movdqa {{.*#+}} xmm4 = [154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154] +; SSE-NEXT: pmaxub %xmm4, %xmm3 +; SSE-NEXT: pmaxub %xmm4, %xmm2 +; SSE-NEXT: pmaxub %xmm4, %xmm1 +; SSE-NEXT: pmaxub %xmm4, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm4 = [102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102] +; SSE-NEXT: paddb %xmm4, %xmm0 +; SSE-NEXT: paddb %xmm4, %xmm1 +; SSE-NEXT: paddb %xmm4, %xmm2 +; SSE-NEXT: paddb %xmm4, %xmm3 +; SSE-NEXT: retq +; +; AVX1-LABEL: test27: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154] +; AVX1-NEXT: vpmaxub %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpmaxub %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102] +; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0 +; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test27: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154] +; AVX2-NEXT: vpmaxub %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmaxub %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102,102] +; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test27: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmaxub {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: vpaddb {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: retq +entry: + %0 = icmp ugt <64 x i8> %x, + %1 = select <64 x i1> %0, <64 x i8> %x, <64 x i8> + %2 = add <64 x i8> %1, + ret <64 x i8> %2 +} + +define <64 x i8> @test28(<64 x i8> %x) { +; SSE-LABEL: test28: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movdqa {{.*#+}} xmm4 = [1,234,206,142,179,186,123,98,63,19,234,100,25,34,55,70] +; SSE-NEXT: pmaxub %xmm4, %xmm2 +; SSE-NEXT: pmaxub %xmm4, %xmm0 +; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm3 +; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,22,50,114,77,70,133,158,193,237,22,156,231,222,201,186] +; SSE-NEXT: paddb %xmm4, %xmm0 +; SSE-NEXT: paddb %xmm4, %xmm2 +; SSE-NEXT: paddb {{.*}}(%rip), %xmm1 +; SSE-NEXT: paddb {{.*}}(%rip), %xmm3 +; SSE-NEXT: retq +; +; AVX1-LABEL: test28: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,234,206,142,179,186,123,98,63,19,234,100,25,34,55,70] +; AVX1-NEXT: vpmaxub %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpmaxub {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,22,50,114,77,70,133,158,193,237,22,156,231,222,201,186] +; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm2 +; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test28: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpmaxub {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmaxub {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test28: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmaxub {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: vpaddb {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: retq +entry: + %0 = icmp ugt <64 x i8> %x, + %1 = select <64 x i1> %0, <64 x i8> %x, <64 x i8> + %2 = add <64 x i8> %1, + ret <64 x i8> %2 +} + +define <32 x i16> @test29(<32 x i16> %x) { +; SSE2-LABEL: test29: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: pmaxsw {{.*}}(%rip), %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: pmaxsw {{.*}}(%rip), %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: pmaxsw {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: pmaxsw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm1 +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm2 +; SSE2-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: test29: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSSE3-NEXT: pxor %xmm4, %xmm3 +; SSSE3-NEXT: pmaxsw {{.*}}(%rip), %xmm3 +; SSSE3-NEXT: pxor %xmm4, %xmm3 +; SSSE3-NEXT: pxor %xmm4, %xmm2 +; SSSE3-NEXT: pmaxsw {{.*}}(%rip), %xmm2 +; SSSE3-NEXT: pxor %xmm4, %xmm2 +; SSSE3-NEXT: pxor %xmm4, %xmm1 +; SSSE3-NEXT: pmaxsw {{.*}}(%rip), %xmm1 +; SSSE3-NEXT: pxor %xmm4, %xmm1 +; SSSE3-NEXT: pxor %xmm4, %xmm0 +; SSSE3-NEXT: pmaxsw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pxor %xmm4, %xmm0 +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm1 +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm2 +; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: test29: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm3 +; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm2 +; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm1 +; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm1 +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm2 +; SSE41-NEXT: paddw {{.*}}(%rip), %xmm3 +; SSE41-NEXT: retq +; +; AVX1-LABEL: test29: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test29: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test29: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmaxuw {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: vpaddw {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: retq +entry: + %0 = icmp ugt <32 x i16> %x, + %1 = select <32 x i1> %0, <32 x i16> %x, <32 x i16> + %2 = add <32 x i16> %1, + ret <32 x i16> %2 +} -- 2.50.1