From: Hans Wennborg Date: Thu, 18 Jan 2018 11:37:05 +0000 (+0000) Subject: Merging r322644: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=aa06dbe86af4ac06759ceaede59813f2606770f3;p=llvm Merging r322644: ------------------------------------------------------------------------ r322644 | d0k | 2018-01-17 05:01:06 -0800 (Wed, 17 Jan 2018) | 7 lines [X86] Don't mutate shuffle arguments after early-out for AVX512 The match* functions have the annoying behavior of modifying its inputs. Save and restore the inputs, just in case the early out for AVX512 is hit. This is still not great and its only a matter of time this kind of bug happens again, but I couldn't come up with a better pattern without rewriting significant chunks of this code. Fixes PR35977. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@322840 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8da0cd6df33..f053d3891eb 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -28669,13 +28669,14 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } + SDValue NewV1 = V1; // Save operand in case early exit happens. if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, - V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, - ShuffleVT) && + NewV1, DL, DAG, Subtarget, Shuffle, + ShuffleSrcVT, ShuffleVT) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - Res = DAG.getBitcast(ShuffleSrcVT, V1); + Res = DAG.getBitcast(ShuffleSrcVT, NewV1); DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); DCI.AddToWorklist(Res.getNode()); @@ -28697,33 +28698,36 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } + SDValue NewV1 = V1; // Save operands in case early exit happens. + SDValue NewV2 = V2; if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, - V1, V2, DL, DAG, Subtarget, Shuffle, + NewV1, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT, UnaryShuffle) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - V1 = DAG.getBitcast(ShuffleSrcVT, V1); - DCI.AddToWorklist(V1.getNode()); - V2 = DAG.getBitcast(ShuffleSrcVT, V2); - DCI.AddToWorklist(V2.getNode()); - Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2); + NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1); + DCI.AddToWorklist(NewV1.getNode()); + NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2); + DCI.AddToWorklist(NewV2.getNode()); + Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2); DCI.AddToWorklist(Res.getNode()); return DAG.getBitcast(RootVT, Res); } - if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, - AllowIntDomain, V1, V2, DL, DAG, - Subtarget, Shuffle, ShuffleVT, - PermuteImm) && + NewV1 = V1; // Save operands in case early exit happens. + NewV2 = V2; + if (matchBinaryPermuteVectorShuffle( + MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, + NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - V1 = DAG.getBitcast(ShuffleVT, V1); - DCI.AddToWorklist(V1.getNode()); - V2 = DAG.getBitcast(ShuffleVT, V2); - DCI.AddToWorklist(V2.getNode()); - Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2, + NewV1 = DAG.getBitcast(ShuffleVT, NewV1); + DCI.AddToWorklist(NewV1.getNode()); + NewV2 = DAG.getBitcast(ShuffleVT, NewV2); + DCI.AddToWorklist(NewV2.getNode()); + Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, DAG.getConstant(PermuteImm, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); return DAG.getBitcast(RootVT, Res); diff --git a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index 333efb04913..1a483355319 100644 --- a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -4780,3 +4780,42 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mem_mask1(<8 x doub ret <2 x double> %res } +; PR35977 +define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) { +; CHECK-LABEL: test_zext_v8i8_to_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa %xmm0, (%rsi) +; CHECK-NEXT: retq + %tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0 + %tmp2 = load <8 x i8>, <8 x i8>* %tmp + %tmp3 = extractelement <8 x i8> %tmp2, i32 0 + %tmp4 = zext i8 %tmp3 to i16 + %tmp5 = insertelement <8 x i16> undef, i16 %tmp4, i32 0 + %tmp6 = extractelement <8 x i8> %tmp2, i32 1 + %tmp7 = zext i8 %tmp6 to i16 + %tmp8 = insertelement <8 x i16> %tmp5, i16 %tmp7, i32 1 + %tmp9 = extractelement <8 x i8> %tmp2, i32 2 + %tmp10 = zext i8 %tmp9 to i16 + %tmp11 = insertelement <8 x i16> %tmp8, i16 %tmp10, i32 2 + %tmp12 = extractelement <8 x i8> %tmp2, i32 3 + %tmp13 = zext i8 %tmp12 to i16 + %tmp14 = insertelement <8 x i16> %tmp11, i16 %tmp13, i32 3 + %tmp15 = extractelement <8 x i8> %tmp2, i32 4 + %tmp16 = zext i8 %tmp15 to i16 + %tmp17 = insertelement <8 x i16> %tmp14, i16 %tmp16, i32 4 + %tmp18 = extractelement <8 x i8> %tmp2, i32 5 + %tmp19 = zext i8 %tmp18 to i16 + %tmp20 = insertelement <8 x i16> %tmp17, i16 %tmp19, i32 5 + %tmp21 = extractelement <8 x i8> %tmp2, i32 6 + %tmp22 = zext i8 %tmp21 to i16 + %tmp23 = insertelement <8 x i16> %tmp20, i16 %tmp22, i32 6 + %tmp24 = extractelement <8 x i8> %tmp2, i32 7 + %tmp25 = zext i8 %tmp24 to i16 + %tmp26 = insertelement <8 x i16> %tmp23, i16 %tmp25, i32 7 + %tmp27 = shl <8 x i16> %tmp26, + %tmp28 = getelementptr <8 x i16>, <8 x i16>* %arg1, i32 0 + store <8 x i16> %tmp27, <8 x i16>* %tmp28 + ret void +}