[InstCombine][X86] Add tests showing missed opportunities to simplify PMULUDQ/PMULDQ...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Sat, 24 Dec 2016 17:30:19 +0000 (17:30 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Sat, 24 Dec 2016 17:30:19 +0000 (17:30 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Sat, 24 Dec 2016 17:30:19 +0000 (17:30 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Sat, 24 Dec 2016 17:30:19 +0000 (17:30 +0000)
diff --git a/test/Transforms/InstCombine/x86-muldq.ll b/test/Transforms/InstCombine/x86-muldq.ll

new file mode 100644 (file)

index 0000000..6c39552
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-muldq.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;
+; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required.
+;
+
+define <2 x i64> @test_demanded_elts_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuludq_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
+;
+  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+  %3 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %1, <4 x i32> %2)
+  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %4
+}
+
+define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuludq_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
+;
+  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  %3 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %1, <8 x i32> %2)
+  ret <4 x i64> %3
+}
+
+define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuldq_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+  %3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %1, <4 x i32> %2)
+  ret <2 x i64> %3
+}
+
+define <4 x i64> @test_demanded_elts_pmuluq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuluq_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
+; CHECK-NEXT:    ret <4 x i64> [[TMP4]]
+;
+  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  %3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %1, <8 x i32> %2)
+  %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
+  ret <4 x i64> %4
+}
+
+declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sat, 24 Dec 2016 17:30:19 +0000 (17:30 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sat, 24 Dec 2016 17:30:19 +0000 (17:30 +0000)