From: Sanjay Patel Date: Sun, 22 Jan 2017 17:06:12 +0000 (+0000) Subject: [x86] avoid crashing with illegal vector type (PR31672) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=873d460b05a61a8e2eb58cedcff0a82090165410;p=llvm [x86] avoid crashing with illegal vector type (PR31672) https://llvm.org/bugs/show_bug.cgi?id=31672 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292758 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d1e4846928c..ead6e95b89c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -28823,10 +28823,12 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// If a vector select has an operand that is -1 or 0, simplify the select to a -/// bitwise logic operation. -static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +/// If a vector select has an operand that is -1 or 0, try to simplify the +/// select to a bitwise logic operation. +static SDValue +combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); @@ -28888,18 +28890,28 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, } } - if (!TValIsAllOnes && !FValIsAllZeros) + // vselect Cond, 111..., 000... -> Cond + if (TValIsAllOnes && FValIsAllZeros) + return DAG.getBitcast(VT, Cond); + + if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT)) return SDValue(); - SDValue Ret; - if (TValIsAllOnes && FValIsAllZeros) - Ret = Cond; - else if (TValIsAllOnes) - Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS)); - else if (FValIsAllZeros) - Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, DAG.getBitcast(CondVT, LHS)); + // vselect Cond, 111..., X -> or Cond, X + if (TValIsAllOnes) { + SDValue CastRHS = DAG.getBitcast(CondVT, RHS); + SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS); + return DAG.getBitcast(VT, Or); + } - return DAG.getBitcast(VT, Ret); + // vselect Cond, X, 000... -> and Cond, X + if (FValIsAllZeros) { + SDValue CastLHS = DAG.getBitcast(CondVT, LHS); + SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS); + return DAG.getBitcast(VT, And); + } + + return SDValue(); } static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { @@ -29404,7 +29416,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, } } - if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget)) + if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) return V; // If this is a *dynamic* select (non-constant condition) and we can match diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll index beedb1d2465..9488d6d2605 100644 --- a/test/CodeGen/X86/sse1.ll +++ b/test/CodeGen/X86/sse1.ll @@ -215,3 +215,136 @@ define <4 x i32> @PR30512(<4 x i32> %x, <4 x i32> %y) nounwind { ret <4 x i32> %zext } +; Fragile test warning - we need to induce the generation of a vselect +; post-legalization to cause the crash seen in: +; https://llvm.org/bugs/show_bug.cgi?id=31672 +; Is there a way to do that without an unsafe/fast sqrt intrinsic call? +; Also, although the goal for adding this test is to prove that we +; don't crash, I have no idea what this code is doing, so I'm keeping +; the full codegen checks in case there's motivation to improve this. + +define <2 x float> @PR31672() #0 { +; X32-LABEL: PR31672: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $80, %esp +; X32-NEXT: xorps %xmm0, %xmm0 +; X32-NEXT: movaps {{.*#+}} xmm1 = <42,3,u,u> +; X32-NEXT: movaps %xmm1, %xmm2 +; X32-NEXT: cmpeqps %xmm0, %xmm2 +; X32-NEXT: movaps %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X32-NEXT: rsqrtps %xmm1, %xmm0 +; X32-NEXT: mulps %xmm0, %xmm1 +; X32-NEXT: mulps %xmm0, %xmm1 +; X32-NEXT: addps {{\.LCPI.*}}, %xmm1 +; X32-NEXT: mulps {{\.LCPI.*}}, %xmm0 +; X32-NEXT: mulps %xmm1, %xmm0 +; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: andl %eax, %ecx +; X32-NEXT: notl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: andl %ecx, %edx +; X32-NEXT: notl %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: orl %edx, %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: andl %ecx, %edx +; X32-NEXT: notl %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: orl %edx, %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: andl %eax, %ecx +; X32-NEXT: notl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; X64-LABEL: PR31672: +; X64: # BB#0: +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps {{.*#+}} xmm1 = <42,3,u,u> +; X64-NEXT: cmpeqps %xmm1, %xmm0 +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: rsqrtps %xmm1, %xmm0 +; X64-NEXT: mulps %xmm0, %xmm1 +; X64-NEXT: mulps %xmm0, %xmm1 +; X64-NEXT: addps {{.*}}(%rip), %xmm1 +; X64-NEXT: mulps {{.*}}(%rip), %xmm0 +; X64-NEXT: mulps %xmm1, %xmm0 +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi +; X64-NEXT: movl %r9d, %esi +; X64-NEXT: andl %edi, %esi +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: notl %ecx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-NEXT: andl %eax, %ecx +; X64-NEXT: orl %esi, %ecx +; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl %r8d, %ecx +; X64-NEXT: andl %r10d, %ecx +; X64-NEXT: movl %r10d, %esi +; X64-NEXT: notl %esi +; X64-NEXT: andl %edx, %esi +; X64-NEXT: orl %ecx, %esi +; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrq $32, %r9 +; X64-NEXT: shrq $32, %rdi +; X64-NEXT: andl %edi, %r9d +; X64-NEXT: notl %edi +; X64-NEXT: shrq $32, %rax +; X64-NEXT: andl %edi, %eax +; X64-NEXT: orl %r9d, %eax +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrq $32, %r8 +; X64-NEXT: shrq $32, %r10 +; X64-NEXT: andl %r10d, %r8d +; X64-NEXT: notl %r10d +; X64-NEXT: shrq $32, %rdx +; X64-NEXT: andl %r10d, %edx +; X64-NEXT: orl %r8d, %edx +; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: retq + %t0 = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> ) + ret <2 x float> %t0 +} + +declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #1 + +attributes #0 = { nounwind "unsafe-fp-math"="true" } +