From: Craig Topper Date: Sun, 8 Sep 2019 19:24:29 +0000 (+0000) Subject: [X86] Teach materializeVectorConstant to not call getZeroVector/getOnesVector on... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cbda742932a075a5a48ce0c0a6541d8d9d12cfbc;p=llvm [X86] Teach materializeVectorConstant to not call getZeroVector/getOnesVector on the types we already have isel patterns for. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371343 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5e5ec5c6c8b..c7844220dfc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9137,7 +9137,8 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, if (ISD::isBuildVectorAllZeros(Op.getNode())) { // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. - if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) + if (VT.isFloatingPoint() || + VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) return Op; return getZeroVector(VT, Subtarget, DAG, DL); @@ -9147,8 +9148,7 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { - if (VT == MVT::v4i32 || VT == MVT::v16i32 || - (VT == MVT::v8i32 && Subtarget.hasInt256())) + if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) return Op; return getOnesVector(VT, DAG, DL); diff --git a/test/CodeGen/X86/fold-load-vec.ll b/test/CodeGen/X86/fold-load-vec.ll index 115f2bf7a5b..e8dc8f26ffa 100644 --- a/test/CodeGen/X86/fold-load-vec.ll +++ b/test/CodeGen/X86/fold-load-vec.ll @@ -12,7 +12,7 @@ define void @sample_test(<4 x float>* %source, <2 x float>* %dest) nounwind { ; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: movlps %xmm0, (%rsp) -; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: movlps %xmm0, (%rsp) ; CHECK-NEXT: movlps %xmm0, (%rsi) ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax