From: Simon Pilgrim Date: Fri, 20 Jan 2017 18:20:30 +0000 (+0000) Subject: [InstCombine][X86] Add MULDQ/MULUDQ undef handling X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=116ba1a31ac0d403c0ed644b63585b88bbdca60a;p=llvm [InstCombine][X86] Add MULDQ/MULUDQ undef handling git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292627 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index b0a03270f30..2ab07eb42a2 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -510,6 +510,18 @@ static Value *simplifyX86varShift(const IntrinsicInst &II, return Builder.CreateAShr(Vec, ShiftVec); } +static Value *simplifyX86muldq(const IntrinsicInst &II) { + Value *Arg0 = II.getArgOperand(0); + Value *Arg1 = II.getArgOperand(1); + Type *ResTy = II.getType(); + + // muldq/muludq(undef, undef) -> undef + if (isa(Arg0) && isa(Arg1)) + return UndefValue::get(ResTy); + + return nullptr; +} + static Value *simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { Value *Arg = II.getArgOperand(0); @@ -2142,6 +2154,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_pmulu_dq: case Intrinsic::x86_avx512_pmul_dq_512: case Intrinsic::x86_avx512_pmulu_dq_512: { + if (Value *V = simplifyX86muldq(*II)) + return replaceInstUsesWith(*II, V); + unsigned VWidth = II->getType()->getVectorNumElements(); APInt UndefElts(VWidth, 0); APInt DemandedElts = APInt::getAllOnesValue(VWidth); diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index fb7177f1ddb..e625f804a23 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1469,6 +1469,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Depth + 1); if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } + // Output elements are undefined if both are undefined. Consider things + // like undef*0. The result is known zero, not undef. + for (unsigned i = 0; i != VWidth; ++i) + if (UndefElts2[i * 2] && UndefElts3[i * 2]) + UndefElts.setBit(i); + break; } diff --git a/test/Transforms/InstCombine/x86-muldq.ll b/test/Transforms/InstCombine/x86-muldq.ll index a5202221b0d..619e53ebae8 100644 --- a/test/Transforms/InstCombine/x86-muldq.ll +++ b/test/Transforms/InstCombine/x86-muldq.ll @@ -7,8 +7,7 @@ define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @undef_pmuludq_128( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> undef ; %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef) ret <2 x i64> %1 @@ -16,8 +15,7 @@ define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) { define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @undef_pmuludq_256( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> undef ; %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef) ret <4 x i64> %1 @@ -25,8 +23,7 @@ define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @undef_pmuludq_512( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef) -; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; CHECK-NEXT: ret <8 x i64> undef ; %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef) ret <8 x i64> %1 @@ -34,8 +31,7 @@ define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) { define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @undef_pmuldq_128( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> undef ; %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef) ret <2 x i64> %1 @@ -43,8 +39,7 @@ define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @undef_pmuldq_256( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> undef ; %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef) ret <4 x i64> %1 @@ -52,13 +47,66 @@ define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @undef_pmuldq_512( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef) -; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; CHECK-NEXT: ret <8 x i64> undef ; %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef) ret <8 x i64> %1 } +define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuludq_128( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> ) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer) + ret <2 x i64> %1 +} + +define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuludq_256( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> , <8 x i32> undef) +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; + %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef) + ret <4 x i64> %1 +} + +define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuludq_512( +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> ) +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer) + ret <8 x i64> %1 +} + +define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuldq_128( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> , <4 x i32> undef) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef) + ret <2 x i64> %1 +} + +define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuldq_256( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> ) +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; + %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer) + ret <4 x i64> %1 +} + +define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuldq_512( +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> , <16 x i32> undef) +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef) + ret <8 x i64> %1 +} + ; ; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required. ;