From: Michael Kuperstein Date: Wed, 18 Jan 2017 23:05:58 +0000 (+0000) Subject: Revert r291670 because it introduces a crash. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=57f066878163bf11be142fa73c87d26155751ef6;p=llvm Revert r291670 because it introduces a crash. r291670 doesn't crash on the original testcase from PR31589, but it crashes on a slightly more complex one. PR31589 has the new reproducer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292444 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ee6a14cdb01..b686c42b3f1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -31335,93 +31335,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); } -/// Check if truncation with saturation form type \p SrcVT to \p DstVT -/// is valid for the given \p Subtarget. -static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT, - const X86Subtarget &Subtarget) { - if (!Subtarget.hasAVX512()) - return false; - - // FIXME: Scalar type may be supported if we move it to vector register. - if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512) - return false; - - EVT SrcElVT = SrcVT.getScalarType(); - EVT DstElVT = DstVT.getScalarType(); - if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64) - return false; - if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32) - return false; - if (SrcVT.is512BitVector() || Subtarget.hasVLX()) - return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI(); - return false; -} - -/// Return true if VPACK* instruction can be used for the given types -/// and it is avalable on \p Subtarget. -static bool -isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) { - if (Subtarget.hasSSE2()) - // v16i16 -> v16i8 - if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8) - return true; - if (Subtarget.hasSSE41()) - // v8i32 -> v8i16 - if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16) - return true; - return false; -} - -/// Detect a pattern of truncation with saturation: -/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectUSatPattern(SDValue In, EVT VT) { - if (In.getOpcode() != ISD::UMIN) - return SDValue(); - - //Saturation with truncation. We truncate from InVT to VT. - assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() && - "Unexpected types for truncate operation"); - - APInt C; - if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) { - // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according - // the element size of the destination type. - return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) : - SDValue(); - } - return SDValue(); -} - -/// Detect a pattern of truncation with saturation: -/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). -/// The types should allow to use VPMOVUS* instruction on AVX512. -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectAVX512USatPattern(SDValue In, EVT VT, - const X86Subtarget &Subtarget) { - if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return SDValue(); - return detectUSatPattern(In, VT); -} - -static SDValue -combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - SDValue USatVal = detectUSatPattern(In, VT); - if (USatVal) { - if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); - if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) { - SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL); - return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi); - } - } - return SDValue(); -} - /// This function detects the AVG pattern between vectors of unsigned i8/i16, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// X86ISD::AVG instruction. @@ -31988,12 +31901,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); - if (SDValue Val = - detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget)) - return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), - dl, Val, St->getBasePtr(), - St->getMemoryVT(), St->getMemOperand(), DAG); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); @@ -32614,10 +32521,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; - // Try to combine truncation with unsigned saturation. - if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget)) - return Val; - // The bitcast source is a direct mmx result. // Detect bitcasts between i32 to x86mmx if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll index c729b988cfb..789ca241394 100644 --- a/test/CodeGen/X86/avx-trunc.ll +++ b/test/CodeGen/X86/avx-trunc.ll @@ -39,29 +39,3 @@ define <16 x i8> @trunc_16_8(<16 x i16> %A) nounwind uwtable readnone ssp{ %B = trunc <16 x i16> %A to <16 x i8> ret <16 x i8> %B } - -define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) { -; CHECK-LABEL: usat_trunc_wb_256: -; CHECK: # BB#0: -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - %x3 = icmp ult <16 x i16> %i, - %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> - %x6 = trunc <16 x i16> %x5 to <16 x i8> - ret <16 x i8> %x6 -} - -define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) { -; CHECK-LABEL: usat_trunc_dw_256: -; CHECK: # BB#0: -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - %x3 = icmp ult <8 x i32> %i, - %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> - %x6 = trunc <8 x i32> %x5 to <8 x i16> - ret <8 x i16> %x6 -} diff --git a/test/CodeGen/X86/avx512-trunc.ll b/test/CodeGen/X86/avx512-trunc.ll index fb6c55b26e7..646697b82c2 100644 --- a/test/CodeGen/X86/avx512-trunc.ll +++ b/test/CodeGen/X86/avx512-trunc.ll @@ -500,208 +500,3 @@ define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 { store <8 x i8> %x, <8 x i8>* %res ret void } - - -define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) { -; KNL-LABEL: usat_trunc_wb_256_mem: -; KNL: ## BB#0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vmovdqu %xmm0, (%rdi) -; KNL-NEXT: retq -; -; SKX-LABEL: usat_trunc_wb_256_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovuswb %ymm0, (%rdi) -; SKX-NEXT: retq - %x3 = icmp ult <16 x i16> %i, - %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> - %x6 = trunc <16 x i16> %x5 to <16 x i8> - store <16 x i8> %x6, <16 x i8>* %res, align 1 - ret void -} - -define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) { -; KNL-LABEL: usat_trunc_wb_256: -; KNL: ## BB#0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; KNL-NEXT: retq -; -; SKX-LABEL: usat_trunc_wb_256: -; SKX: ## BB#0: -; SKX-NEXT: vpmovuswb %ymm0, %xmm0 -; SKX-NEXT: retq - %x3 = icmp ult <16 x i16> %i, - %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> - %x6 = trunc <16 x i16> %x5 to <16 x i8> - ret <16 x i8> %x6 -} - -define void @usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) { -; KNL-LABEL: usat_trunc_wb_128_mem: -; KNL: ## BB#0: -; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 -; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; KNL-NEXT: vmovq %xmm0, (%rdi) -; KNL-NEXT: retq -; -; SKX-LABEL: usat_trunc_wb_128_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovuswb %xmm0, (%rdi) -; SKX-NEXT: retq - %x3 = icmp ult <8 x i16> %i, - %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> - %x6 = trunc <8 x i16> %x5 to <8 x i8> - store <8 x i8> %x6, <8 x i8>* %res, align 1 - ret void -} - -define void @usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) { -; ALL-LABEL: usat_trunc_db_512_mem: -; ALL: ## BB#0: -; ALL-NEXT: vpmovusdb %zmm0, (%rdi) -; ALL-NEXT: retq - %x3 = icmp ult <16 x i32> %i, - %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> - %x6 = trunc <16 x i32> %x5 to <16 x i8> - store <16 x i8> %x6, <16 x i8>* %res, align 1 - ret void -} - -define void @usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) { -; ALL-LABEL: usat_trunc_qb_512_mem: -; ALL: ## BB#0: -; ALL-NEXT: vpmovusqb %zmm0, (%rdi) -; ALL-NEXT: retq - %x3 = icmp ult <8 x i64> %i, - %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> - %x6 = trunc <8 x i64> %x5 to <8 x i8> - store <8 x i8> %x6, <8 x i8>* %res, align 1 - ret void -} - -define void @usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) { -; ALL-LABEL: usat_trunc_qd_512_mem: -; ALL: ## BB#0: -; ALL-NEXT: vpmovusqd %zmm0, (%rdi) -; ALL-NEXT: retq - %x3 = icmp ult <8 x i64> %i, - %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> - %x6 = trunc <8 x i64> %x5 to <8 x i32> - store <8 x i32> %x6, <8 x i32>* %res, align 1 - ret void -} - -define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) { -; ALL-LABEL: usat_trunc_qw_512_mem: -; ALL: ## BB#0: -; ALL-NEXT: vpmovusqw %zmm0, (%rdi) -; ALL-NEXT: retq - %x3 = icmp ult <8 x i64> %i, - %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> - %x6 = trunc <8 x i64> %x5 to <8 x i16> - store <8 x i16> %x6, <8 x i16>* %res, align 1 - ret void -} - -define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) { -; KNL-LABEL: usat_trunc_db_1024: -; KNL: ## BB#0: -; KNL-NEXT: vpmovusdb %zmm0, %xmm0 -; KNL-NEXT: vpmovusdb %zmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; KNL-NEXT: retq -; -; SKX-LABEL: usat_trunc_db_1024: -; SKX: ## BB#0: -; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2 -; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1 -; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0 -; SKX-NEXT: vpmovdw %zmm0, %ymm0 -; SKX-NEXT: vpmovdw %zmm1, %ymm1 -; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; SKX-NEXT: vpmovwb %zmm0, %ymm0 -; SKX-NEXT: retq - %x3 = icmp ult <32 x i32> %i, - %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> - %x6 = trunc <32 x i32> %x5 to <32 x i8> - ret <32 x i8> %x6 -} - -define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) { -; KNL-LABEL: usat_trunc_db_1024_mem: -; KNL: ## BB#0: -; KNL-NEXT: vpmovusdb %zmm0, %xmm0 -; KNL-NEXT: vpmovusdb %zmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; KNL-NEXT: vmovdqu %ymm0, (%rdi) -; KNL-NEXT: retq -; -; SKX-LABEL: usat_trunc_db_1024_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2 -; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1 -; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0 -; SKX-NEXT: vpmovdw %zmm0, %ymm0 -; SKX-NEXT: vpmovdw %zmm1, %ymm1 -; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; SKX-NEXT: vpmovwb %zmm0, (%rdi) -; SKX-NEXT: retq - %x3 = icmp ult <32 x i32> %i, - %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> - %x6 = trunc <32 x i32> %x5 to <32 x i8> - store <32 x i8>%x6, <32 x i8>* %p, align 1 - ret void -} - -define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) { -; ALL-LABEL: usat_trunc_dw_512: -; ALL: ## BB#0: -; ALL-NEXT: vpmovusdw %zmm0, %ymm0 -; ALL-NEXT: retq - %x3 = icmp ult <16 x i32> %i, - %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> - %x6 = trunc <16 x i32> %x5 to <16 x i16> - ret <16 x i16> %x6 -} - -define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) { -; ALL-LABEL: usat_trunc_wb_128: -; ALL: ## BB#0: -; ALL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 -; ALL-NEXT: retq - %x3 = icmp ult <8 x i16> %i, - %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> - %x6 = trunc <8 x i16> %x5 to <8 x i8> - ret <8 x i8>%x6 -} - -define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) { -; KNL-LABEL: usat_trunc_qw_1024: -; KNL: ## BB#0: -; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2 -; KNL-NEXT: vpminuq %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vpminuq %zmm2, %zmm0, %zmm0 -; KNL-NEXT: vpmovqd %zmm0, %ymm0 -; KNL-NEXT: vpmovqd %zmm1, %ymm1 -; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; KNL-NEXT: vpmovdw %zmm0, %ymm0 -; KNL-NEXT: retq -; -; SKX-LABEL: usat_trunc_qw_1024: -; SKX: ## BB#0: -; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2 -; SKX-NEXT: vpminuq %zmm2, %zmm1, %zmm1 -; SKX-NEXT: vpminuq %zmm2, %zmm0, %zmm0 -; SKX-NEXT: vpmovqd %zmm0, %ymm0 -; SKX-NEXT: vpmovqd %zmm1, %ymm1 -; SKX-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 -; SKX-NEXT: vpmovdw %zmm0, %ymm0 -; SKX-NEXT: retq - %x3 = icmp ult <16 x i64> %i, - %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> - %x6 = trunc <16 x i64> %x5 to <16 x i16> - ret <16 x i16> %x6 -} -