From: Simon Pilgrim Date: Mon, 2 Oct 2017 15:43:26 +0000 (+0000) Subject: [X86][SSE] Add PACKSS/PACKUS constant folding tests X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6b14a36476a24dfa8b2503c2d4b2310803519629;p=llvm [X86][SSE] Add PACKSS/PACKUS constant folding tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314682 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 2082854c0d0..7c9874e9a48 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -18,6 +18,25 @@ define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) { declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone +define <16 x i16> @test_x86_avx2_packssdw_fold() { +; AVX2-LABEL: test_x86_avx2_packssdw_fold: +; AVX2: ## BB#0: +; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] +; AVX2-NEXT: vpackssdw LCPI1_0, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI1_0, kind: FK_Data_4 +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx2_packssdw_fold: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] +; AVX512VL-NEXT: vpackssdw LCPI1_0, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI1_0, kind: FK_Data_4 +; AVX512VL-NEXT: retl ## encoding: [0xc3] + %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> ) + ret <16 x i16> %res +} + + define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) { ; AVX2-LABEL: test_x86_avx2_packsswb: ; AVX2: ## BB#0: @@ -34,6 +53,29 @@ define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) { declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone +define <32 x i8> @test_x86_avx2_packsswb_fold() { +; AVX2-LABEL: test_x86_avx2_packsswb_fold: +; AVX2: ## BB#0: +; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858] +; AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI3_0, kind: FK_Data_4 +; AVX2-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x63,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx2_packsswb_fold: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] +; AVX512VL-NEXT: vmovdqa LCPI3_0, %ymm1 ## EVEX TO VEX Compression ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858] +; AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI3_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x63,0xc0] +; AVX512VL-NEXT: retl ## encoding: [0xc3] + %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> , <16 x i16> zeroinitializer) + ret <32 x i8> %res +} + + define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) { ; AVX2-LABEL: test_x86_avx2_packuswb: ; AVX2: ## BB#0: @@ -50,6 +92,29 @@ define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) { declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone +define <32 x i8> @test_x86_avx2_packuswb_fold() { +; AVX2-LABEL: test_x86_avx2_packuswb_fold: +; AVX2: ## BB#0: +; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858] +; AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI5_0, kind: FK_Data_4 +; AVX2-NEXT: vpackuswb %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x67,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx2_packuswb_fold: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] +; AVX512VL-NEXT: vmovdqa LCPI5_0, %ymm1 ## EVEX TO VEX Compression ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858] +; AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI5_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpackuswb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x67,0xc0] +; AVX512VL-NEXT: retl ## encoding: [0xc3] + %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> , <16 x i16> zeroinitializer) + ret <32 x i8> %res +} + + define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) { ; AVX2-LABEL: test_x86_avx2_padds_b: ; AVX2: ## BB#0: @@ -782,6 +847,25 @@ define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) { declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone +define <16 x i16> @test_x86_avx2_packusdw_fold() { +; AVX2-LABEL: test_x86_avx2_packusdw_fold: +; AVX2: ## BB#0: +; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] +; AVX2-NEXT: vpackusdw LCPI55_0, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x2b,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI55_0, kind: FK_Data_4 +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx2_packusdw_fold: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] +; AVX512VL-NEXT: vpackusdw LCPI55_0, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI55_0, kind: FK_Data_4 +; AVX512VL-NEXT: retl ## encoding: [0xc3] + %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> ) + ret <16 x i16> %res +} + + define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) { ; CHECK-LABEL: test_x86_avx2_pblendvb: ; CHECK: ## BB#0: @@ -1247,18 +1331,18 @@ define <4 x i32> @test_x86_avx2_psrav_d_const(<4 x i32> %a0, <4 x i32> %a1) { ; AVX2: ## BB#0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23] ; AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI84_0, kind: FK_Data_4 -; AVX2-NEXT: vpsravd LCPI84_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI84_1, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4 +; AVX2-NEXT: vpsravd LCPI88_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d_const: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmovdqa LCPI84_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] +; AVX512VL-NEXT: vmovdqa LCPI88_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] ; AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI84_0, kind: FK_Data_4 -; AVX512VL-NEXT: vpsravd LCPI84_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI84_1, kind: FK_Data_4 +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpsravd LCPI88_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4 ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> , <4 x i32> ) ret <4 x i32> %res @@ -1284,18 +1368,18 @@ define <8 x i32> @test_x86_avx2_psrav_d_256_const(<8 x i32> %a0, <8 x i32> %a1) ; AVX2: ## BB#0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4 -; AVX2-NEXT: vpsravd LCPI86_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI90_0, kind: FK_Data_4 +; AVX2-NEXT: vpsravd LCPI90_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI90_1, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmovdqa LCPI86_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] +; AVX512VL-NEXT: vmovdqa LCPI90_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4 -; AVX512VL-NEXT: vpsravd LCPI86_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4 +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI90_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpsravd LCPI90_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI90_1, kind: FK_Data_4 ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> , <8 x i32> ) ret <8 x i32> %res diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll index 7bccc4ab4ca..e6d3057fc5d 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -660,8 +660,6 @@ define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone - - define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: test_x86_sse2_packssdw_128: ; SSE: ## BB#0: @@ -683,6 +681,32 @@ define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone +define <8 x i16> @test_x86_sse2_packssdw_128_fold() { +; SSE-LABEL: test_x86_sse2_packssdw_128_fold: +; SSE: ## BB#0: +; SSE-NEXT: pxor %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xef,0xc0] +; SSE-NEXT: packssdw LCPI32_0, %xmm0 ## encoding: [0x66,0x0f,0x6b,0x05,A,A,A,A] +; SSE-NEXT: ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4 +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse2_packssdw_128_fold: +; AVX2: ## BB#0: +; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] +; AVX2-NEXT: vpackssdw LCPI32_0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4 +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_packssdw_128_fold: +; SKX: ## BB#0: +; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] +; SKX-NEXT: vpackssdw LCPI32_0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x05,A,A,A,A] +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4 +; SKX-NEXT: retl ## encoding: [0xc3] + %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> ) + ret <8 x i16> %res +} + + define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: test_x86_sse2_packsswb_128: ; SSE: ## BB#0: @@ -704,6 +728,38 @@ define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone +define <16 x i8> @test_x86_sse2_packsswb_128_fold() { +; SSE-LABEL: test_x86_sse2_packsswb_128_fold: +; SSE: ## BB#0: +; SSE-NEXT: pxor %xmm1, %xmm1 ## encoding: [0x66,0x0f,0xef,0xc9] +; SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,255,256,65535,65535,65281,65280,32858] +; SSE-NEXT: ## encoding: [0x66,0x0f,0x6f,0x05,A,A,A,A] +; SSE-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 +; SSE-NEXT: packsswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x63,0xc1] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse2_packsswb_128_fold: +; AVX2: ## BB#0: +; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,255,256,65535,65535,65281,65280,32858] +; AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 +; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0x63,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_packsswb_128_fold: +; SKX: ## BB#0: +; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] +; SKX-NEXT: vmovdqa LCPI34_0, %xmm1 ## EVEX TO VEX Compression xmm1 = [0,255,256,65535,65535,65281,65280,32858] +; SKX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 +; SKX-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x63,0xc0] +; SKX-NEXT: retl ## encoding: [0xc3] + %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> , <8 x i16> zeroinitializer) + ret <16 x i8> %res +} + + define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: test_x86_sse2_packuswb_128: ; SSE: ## BB#0: @@ -725,6 +781,38 @@ define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone +define <16 x i8> @test_x86_sse2_packuswb_128_fold() { +; SSE-LABEL: test_x86_sse2_packuswb_128_fold: +; SSE: ## BB#0: +; SSE-NEXT: pxor %xmm1, %xmm1 ## encoding: [0x66,0x0f,0xef,0xc9] +; SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,255,256,65535,65535,65281,65280,32858] +; SSE-NEXT: ## encoding: [0x66,0x0f,0x6f,0x05,A,A,A,A] +; SSE-NEXT: ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4 +; SSE-NEXT: packuswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x67,0xc1] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse2_packuswb_128_fold: +; AVX2: ## BB#0: +; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,255,256,65535,65535,65281,65280,32858] +; AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4 +; AVX2-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0x67,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_packuswb_128_fold: +; SKX: ## BB#0: +; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] +; SKX-NEXT: vmovdqa LCPI36_0, %xmm1 ## EVEX TO VEX Compression xmm1 = [0,255,256,65535,65535,65281,65280,32858] +; SKX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4 +; SKX-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x67,0xc0] +; SKX-NEXT: retl ## encoding: [0xc3] + %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> , <8 x i16> zeroinitializer) + ret <16 x i8> %res +} + + define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: test_x86_sse2_padds_b: ; SSE: ## BB#0: diff --git a/test/CodeGen/X86/sse41-intrinsics-x86.ll b/test/CodeGen/X86/sse41-intrinsics-x86.ll index 3abfcf4d542..39acde0b5cd 100644 --- a/test/CodeGen/X86/sse41-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -135,6 +135,32 @@ define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone +define <8 x i16> @test_x86_sse41_packusdw_fold() { +; SSE41-LABEL: test_x86_sse41_packusdw_fold: +; SSE41: ## BB#0: +; SSE41-NEXT: pxor %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xef,0xc0] +; SSE41-NEXT: packusdw LCPI7_0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x2b,0x05,A,A,A,A] +; SSE41-NEXT: ## fixup A - offset: 5, value: LCPI7_0, kind: FK_Data_4 +; SSE41-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse41_packusdw_fold: +; AVX2: ## BB#0: +; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] +; AVX2-NEXT: vpackusdw LCPI7_0, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x2b,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI7_0, kind: FK_Data_4 +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse41_packusdw_fold: +; SKX: ## BB#0: +; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] +; SKX-NEXT: vpackusdw LCPI7_0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x05,A,A,A,A] +; SKX-NEXT: ## fixup A - offset: 5, value: LCPI7_0, kind: FK_Data_4 +; SKX-NEXT: retl ## encoding: [0xc3] + %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> ) + ret <8 x i16> %res +} + + define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { ; SSE41-LABEL: test_x86_sse41_pblendvb: ; SSE41: ## BB#0: