From: Simon Pilgrim Date: Mon, 23 Jan 2017 10:57:39 +0000 (+0000) Subject: [InstCombine][SSE] Tests showing missed opportunities to constant fold PMULDQ/PMULUDQ X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=70a34a86e6752d2a0d9af6d50b4b8692ef88613e;p=llvm [InstCombine][SSE] Tests showing missed opportunities to constant fold PMULDQ/PMULUDQ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292782 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/Transforms/InstCombine/x86-muldq.ll b/test/Transforms/InstCombine/x86-muldq.ll index 619e53ebae8..28c9d021630 100644 --- a/test/Transforms/InstCombine/x86-muldq.ll +++ b/test/Transforms/InstCombine/x86-muldq.ll @@ -107,6 +107,64 @@ define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) { ret <8 x i64> %1 } +; +; Constant Folding +; + +define <2 x i64> @fold_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @fold_pmuludq_128( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> , <4 x i32> ) + ret <2 x i64> %1 +} + +define <4 x i64> @fold_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @fold_pmuludq_256( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; + %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer) + ret <4 x i64> %1 +} + +define <8 x i64> @fold_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @fold_pmuludq_512( +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> , <16 x i32> ) +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> , <16 x i32> ) + ret <8 x i64> %1 +} + +define <2 x i64> @fold_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @fold_pmuldq_128( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> , <4 x i32> ) + ret <2 x i64> %1 +} + +define <4 x i64> @fold_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @fold_pmuldq_256( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; + %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> , <8 x i32> ) + ret <4 x i64> %1 +} + +define <8 x i64> @fold_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @fold_pmuldq_512( +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> , <16 x i32> ) +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> ) + ret <8 x i64> %1 +} + ; ; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required. ;