From 5241875f53055f0f8093b871ede4fac6a9c07518 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 15 Mar 2017 13:16:46 +0000 Subject: [PATCH] [X86][SSE] Fixed shuffle MOVSS/MOVSD combining of all zeroable inputs Turns out it can happen, so the assertion was too harsh Found during fuzz testing git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297833 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 9 +++++---- test/CodeGen/X86/shuffle-combine-crash-2.ll | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/X86/shuffle-combine-crash-2.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6ab8c4b3275..63a69f54be2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -28221,19 +28221,20 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } case X86ISD::MOVSD: case X86ISD::MOVSS: { - bool isFloat = VT.isFloatingPoint(); SDValue V0 = peekThroughBitcasts(N->getOperand(0)); SDValue V1 = peekThroughBitcasts(N->getOperand(1)); - bool isFloat0 = V0.getSimpleValueType().isFloatingPoint(); - bool isFloat1 = V1.getSimpleValueType().isFloatingPoint(); bool isZero0 = ISD::isBuildVectorAllZeros(V0.getNode()); bool isZero1 = ISD::isBuildVectorAllZeros(V1.getNode()); - assert(!(isZero0 && isZero1) && "Zeroable shuffle detected."); + if (isZero0 && isZero1) + return SDValue(); // We often lower to MOVSD/MOVSS from integer as well as native float // types; remove unnecessary domain-crossing bitcasts if we can to make it // easier to combine shuffles later on. We've already accounted for the // domain switching cost when we decided to lower with it. + bool isFloat = VT.isFloatingPoint(); + bool isFloat0 = V0.getSimpleValueType().isFloatingPoint(); + bool isFloat1 = V1.getSimpleValueType().isFloatingPoint(); if ((isFloat != isFloat0 || isZero0) && (isFloat != isFloat1 || isZero1)) { MVT NewVT = isFloat ? (X86ISD::MOVSD == Opcode ? MVT::v2i64 : MVT::v4i32) : (X86ISD::MOVSD == Opcode ? MVT::v2f64 : MVT::v4f32); diff --git a/test/CodeGen/X86/shuffle-combine-crash-2.ll b/test/CodeGen/X86/shuffle-combine-crash-2.ll new file mode 100644 index 00000000000..ea37d5b4853 --- /dev/null +++ b/test/CodeGen/X86/shuffle-combine-crash-2.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 + +define <4 x i64> @fold_movsd_zero() { +; X86-LABEL: fold_movsd_zero: +; X86: # BB#0: +; X86-NEXT: xorps %xmm0, %xmm0 +; X86-NEXT: xorps %xmm1, %xmm1 +; X86-NEXT: retl +; +; X64-LABEL: fold_movsd_zero: +; X64: # BB#0: +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: retq + %insert = insertelement <4 x i64> zeroinitializer, i64 0, i32 0 + %shuffle = shufflevector <4 x i64> %insert, <4 x i64> zeroinitializer, <4 x i32> + ret <4 x i64> %shuffle +} -- 2.50.1