From 3254c743c87df0aefff33424852df5ca3fbda277 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 13 Sep 2017 06:40:26 +0000 Subject: [PATCH] [X86 CodeGen] Optimization of ZeroExtendLoad for v2i8 vector Load with zero-extend and sign-extend from v2i8 to v2i32 is "Legal" since SSE4.1 and may be performed using PMOVZXBD , PMOVSXBD instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313121 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 1 + test/CodeGen/X86/vec_int_to_fp.ll | 45 ++++------------------------ test/CodeGen/X86/vector-sext.ll | 48 ++++++++++++++++++++++++++++++ test/CodeGen/X86/vector-zext.ll | 10 ++----- 4 files changed, 57 insertions(+), 47 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 906dfe02765..ee82c80625a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -941,6 +941,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index b8e94825ff1..38d4afa0950 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -3089,45 +3089,12 @@ define <2 x double> @uitofp_load_2i8_to_2f64(<2 x i8> *%a) { ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: uitofp_load_2i8_to_2f64: -; VEX: # BB#0: -; VEX-NEXT: movzwl (%rdi), %eax -; VEX-NEXT: vmovd %eax, %xmm0 -; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 -; VEX-NEXT: retq -; -; AVX512F-LABEL: uitofp_load_2i8_to_2f64: -; AVX512F: # BB#0: -; AVX512F-NEXT: movzwl (%rdi), %eax -; AVX512F-NEXT: vmovd %eax, %xmm0 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: uitofp_load_2i8_to_2f64: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512DQ-LABEL: uitofp_load_2i8_to_2f64: -; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: movzwl (%rdi), %eax -; AVX512DQ-NEXT: vmovd %eax, %xmm0 -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512DQ-NEXT: retq -; -; AVX512VLDQ-LABEL: uitofp_load_2i8_to_2f64: -; AVX512VLDQ: # BB#0: -; AVX512VLDQ-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX512VLDQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512VLDQ-NEXT: retq +; AVX-LABEL: uitofp_load_2i8_to_2f64: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: retq %ld = load <2 x i8>, <2 x i8> *%a %cvt = uitofp <2 x i8> %ld to <2 x double> ret <2 x double> %cvt diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll index 392c0de95f2..6e5cdf80031 100644 --- a/test/CodeGen/X86/vector-sext.ll +++ b/test/CodeGen/X86/vector-sext.ll @@ -5079,3 +5079,51 @@ define <32 x i8> @sext_32xi1_to_32xi8(<32 x i16> %c1, <32 x i16> %c2)nounwind { %b = sext <32 x i1> %a to <32 x i8> ret <32 x i8> %b } + +define <2 x i32> @sext_2i8_to_2i32(<2 x i8>* %addr) { +; SSE2-LABEL: sext_2i8_to_2i32: +; SSE2: # BB#0: +; SSE2-NEXT: movzwl (%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $24, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; SSE2-NEXT: paddq %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_2i8_to_2i32: +; SSSE3: # BB#0: +; SSSE3-NEXT: movzwl (%rdi), %eax +; SSSE3-NEXT: movd %eax, %xmm0 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSSE3-NEXT: psrad $24, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; SSSE3-NEXT: paddq %xmm0, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_2i8_to_2i32: +; SSE41: # BB#0: +; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 +; SSE41-NEXT: paddq %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: sext_2i8_to_2i32: +; AVX: # BB#0: +; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 +; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X32-SSE41-LABEL: sext_2i8_to_2i32: +; X32-SSE41: # BB#0: +; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 +; X32-SSE41-NEXT: paddq %xmm0, %xmm0 +; X32-SSE41-NEXT: retl + %x = load <2 x i8>, <2 x i8>* %addr, align 1 + %y = sext <2 x i8> %x to <2 x i32> + %z = add <2 x i32>%y, %y + ret <2 x i32>%z +} + diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll index edc3aa3f5c3..13d90f4b0d7 100644 --- a/test/CodeGen/X86/vector-zext.ll +++ b/test/CodeGen/X86/vector-zext.ll @@ -2262,19 +2262,13 @@ define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) { ; ; SSE41-LABEL: zext_2i8_to_2i32: ; SSE41: # BB#0: -; SSE41-NEXT: movzwl (%rdi), %eax -; SSE41-NEXT: movd %eax, %xmm0 -; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: paddq %xmm0, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: zext_2i8_to_2i32: ; AVX: # BB#0: -; AVX-NEXT: movzwl (%rdi), %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero ; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %x = load <2 x i8>, <2 x i8>* %addr, align 1 -- 2.50.1