From bcfcf2585d222a496cc16a030e4963a4f342fe51 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 5 Sep 2019 15:07:07 +0000 Subject: [PATCH] [X86][SSE] EltsFromConsecutiveLoads - ignore non-zero offset base loads (PR43227) As discussed on D64551 and PR43227, we don't correctly handle cases where the base load has a non-zero byte offset. Until we can properly handle this, we must bail from EltsFromConsecutiveLoads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371078 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++++ test/CodeGen/X86/load-partial.ll | 21 +++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dc031459a27..844a29c01c1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7719,6 +7719,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, int LoadSizeInBits = (1 + LastLoadedElt - FirstLoadedElt) * BaseSizeInBits; assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected"); + // TODO: Support offsetting the base load. + if (ByteOffsets[FirstLoadedElt] != 0) + return SDValue(); + // Check to see if the element's load is consecutive to the base load // or offset from a previous (already checked) load. auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) { diff --git a/test/CodeGen/X86/load-partial.ll b/test/CodeGen/X86/load-partial.ll index a32705ad8c9..4e1014fa28a 100644 --- a/test/CodeGen/X86/load-partial.ll +++ b/test/CodeGen/X86/load-partial.ll @@ -374,19 +374,24 @@ define void @PR43227(i32* %explicit_0, <8 x i32>* %explicit_1) { ; ; SSE41-LABEL: PR43227: ; SSE41: # %bb.0: -; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: movaps %xmm0, 672(%rsi) -; SSE41-NEXT: movaps %xmm1, 688(%rsi) +; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; SSE41-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE41-NEXT: movdqa %xmm1, 672(%rsi) +; SSE41-NEXT: movdqa %xmm0, 688(%rsi) ; SSE41-NEXT: retq ; ; AVX-LABEL: PR43227: ; AVX: # %bb.0: -; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3] +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: vmovaps %ymm0, 672(%rsi) -- 2.40.0