From: Simon Pilgrim Date: Sat, 14 Oct 2017 19:57:19 +0000 (+0000) Subject: [X86][SSE] Don't attempt to reduce the imul vector width of odd sized vectors (PR34947) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d83c62b88720f6ea904fd9cdd13b86bca8acea28;p=llvm [X86][SSE] Don't attempt to reduce the imul vector width of odd sized vectors (PR34947) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315825 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ad633747071..c08d79663fa 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -31476,6 +31476,9 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, SDValue N1 = N->getOperand(1); EVT VT = N->getOperand(0).getValueType(); unsigned NumElts = VT.getVectorNumElements(); + if ((NumElts % 2) != 0) + return SDValue(); + unsigned RegSize = 128; MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); @@ -31502,7 +31505,7 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, // result. // Generate shuffle functioning as punpcklwd. SmallVector ShuffleMask(NumElts); - for (unsigned i = 0, e = NumElts/ 2; i < e; i++) { + for (unsigned i = 0, e = NumElts / 2; i < e; i++) { ShuffleMask[2 * i] = i; ShuffleMask[2 * i + 1] = i + NumElts; } diff --git a/test/CodeGen/X86/shrink_vmul.ll b/test/CodeGen/X86/shrink_vmul.ll index 504c8936442..79cf0f2c8f1 100644 --- a/test/CodeGen/X86/shrink_vmul.ll +++ b/test/CodeGen/X86/shrink_vmul.ll @@ -1349,3 +1349,108 @@ entry: store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 ret void } + +; +; Illegal Types +; + +define void @PR34947() { +; X86-LABEL: PR34947: +; X86: # BB#0: +; X86-NEXT: movdqa (%eax), %xmm0 +; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] +; X86-NEXT: movd %xmm1, %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl %ecx +; X86-NEXT: movd %edx, %xmm1 +; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X86-NEXT: movd %xmm2, %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl %ecx +; X86-NEXT: movd %edx, %xmm2 +; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X86-NEXT: movd %xmm0, %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl %ecx +; X86-NEXT: movd %edx, %xmm1 +; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X86-NEXT: movd %xmm0, %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl %ecx +; X86-NEXT: movd %edx, %xmm0 +; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl (%eax) +; X86-NEXT: movd %edx, %xmm0 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] +; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X86-NEXT: pmuludq %xmm2, %xmm1 +; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X86-NEXT: pmuludq %xmm2, %xmm3 +; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] +; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-NEXT: movl $8199, %eax # imm = 0x2007 +; X86-NEXT: movd %eax, %xmm2 +; X86-NEXT: pmuludq %xmm0, %xmm2 +; X86-NEXT: movd %xmm2, (%eax) +; X86-NEXT: movdqa %xmm1, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: PR34947: +; X64: # BB#0: +; X64-NEXT: movdqa (%rax), %xmm0 +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] +; X64-NEXT: movd %xmm1, %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %ecx +; X64-NEXT: movd %edx, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X64-NEXT: movd %xmm2, %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %ecx +; X64-NEXT: movd %edx, %xmm2 +; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-NEXT: movd %xmm0, %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %ecx +; X64-NEXT: movd %edx, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X64-NEXT: movd %xmm0, %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %ecx +; X64-NEXT: movd %edx, %xmm0 +; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl (%rax) +; X64-NEXT: movd %edx, %xmm0 +; X64-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X64-NEXT: pmuludq %xmm2, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X64-NEXT: pmuludq %xmm2, %xmm3 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] +; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X64-NEXT: movl $8199, %eax # imm = 0x2007 +; X64-NEXT: movd %eax, %xmm2 +; X64-NEXT: pmuludq %xmm0, %xmm2 +; X64-NEXT: movd %xmm2, (%rax) +; X64-NEXT: movdqa %xmm1, (%rax) +; X64-NEXT: retq + %tmp = load <9 x i32>, <9 x i32>* undef, align 64 + %rem = urem <9 x i32> zeroinitializer, %tmp + %mul = mul <9 x i32> , %rem + store <9 x i32> %mul, <9 x i32>* undef, align 64 + ret void +}