From: Simon Pilgrim Date: Thu, 31 Jan 2019 11:55:30 +0000 (+0000) Subject: [X86] combineExtractWithShuffle - more aggressively peek through bitcasts X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0540ba21085e3b9d1f7889d6ba8da6bf383fa8ab;p=llvm [X86] combineExtractWithShuffle - more aggressively peek through bitcasts Fixes regression introduced by rL352743 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352745 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0c400980876..cb5dce233f7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -33861,15 +33861,19 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, if (SrcSVT == MVT::i1 || !isa(Idx)) return SDValue(); + SDValue SrcBC = peekThroughBitcasts(Src); + // Handle extract(broadcast(scalar_value)), it doesn't matter what index is. - if (X86ISD::VBROADCAST == Src.getOpcode() && - Src.getOperand(0).getValueType() == VT) - return Src.getOperand(0); + if (X86ISD::VBROADCAST == SrcBC.getOpcode()) { + SDValue SrcOp = SrcBC.getOperand(0); + if (SrcOp.getValueSizeInBits() == VT.getSizeInBits()) + return DAG.getBitcast(VT, SrcOp); + } // Resolve the target shuffle inputs and mask. SmallVector Mask; SmallVector Ops; - if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG)) + if (!resolveTargetShuffleInputs(SrcBC, Ops, Mask, DAG)) return SDValue(); // Attempt to narrow/widen the shuffle mask to the correct size. diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll index fbebc07aae0..f1d31b83a77 100644 --- a/test/CodeGen/X86/extractelement-load.ll +++ b/test/CodeGen/X86/extractelement-load.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=X64-SSSE3 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX +; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X32-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=X64,X64-SSSE3 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -12,15 +12,10 @@ define i32 @t(<2 x i64>* %val) nounwind { ; X32-SSE2-NEXT: movl 8(%eax), %eax ; X32-SSE2-NEXT: retl ; -; X64-SSSE3-LABEL: t: -; X64-SSSE3: # %bb.0: -; X64-SSSE3-NEXT: movl 8(%rdi), %eax -; X64-SSSE3-NEXT: retq -; -; X64-AVX-LABEL: t: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movl 8(%rdi), %eax -; X64-AVX-NEXT: retq +; X64-LABEL: t: +; X64: # %bb.0: +; X64-NEXT: movl 8(%rdi), %eax +; X64-NEXT: retq %tmp2 = load <2 x i64>, <2 x i64>* %val, align 16 ; <<2 x i64>> [#uses=1] %tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1] %tmp4 = extractelement <4 x i32> %tmp3, i32 2 ; [#uses=1] @@ -34,13 +29,9 @@ define i32 @t2(<8 x i32>* %xp) { ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: retl ; -; X64-SSSE3-LABEL: t2: -; X64-SSSE3: # %bb.0: -; X64-SSSE3-NEXT: retq -; -; X64-AVX-LABEL: t2: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: retq +; X64-LABEL: t2: +; X64: # %bb.0: +; X64-NEXT: retq %x = load <8 x i32>, <8 x i32>* %xp %Shuff68 = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> %y = extractelement <8 x i32> %Shuff68, i32 0 @@ -91,16 +82,10 @@ define i64 @t4(<2 x double>* %a) { ; X32-SSE2-NEXT: movd %xmm0, %edx ; X32-SSE2-NEXT: retl ; -; X64-SSSE3-LABEL: t4: -; X64-SSSE3: # %bb.0: -; X64-SSSE3-NEXT: movq (%rdi), %rax -; X64-SSSE3-NEXT: retq -; -; X64-AVX-LABEL: t4: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax -; X64-AVX-NEXT: retq +; X64-LABEL: t4: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: retq %b = load <2 x double>, <2 x double>* %a, align 16 %c = shufflevector <2 x double> %b, <2 x double> %b, <2 x i32> %d = bitcast <2 x double> %c to <2 x i64>