From: Craig Topper Date: Mon, 24 Dec 2018 19:40:17 +0000 (+0000) Subject: [X86] Add test cases for PR40142. NFC X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7d9874e568fcff8da7d391b39a19a83056ca56cd;p=llvm [X86] Add test cases for PR40142. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350058 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll index 2d098771550..72ddc673517 100644 --- a/test/CodeGen/X86/pmul.ll +++ b/test/CodeGen/X86/pmul.ll @@ -1369,3 +1369,76 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) { %3 = mul <8 x i64> %1, %2 ret <8 x i64> %3 } + +define <2 x i64> @pmuldq_square(<2 x i64> %x) { +; SSE2-LABEL: pmuldq_square: +; SSE2: # %bb.0: +; SSE2-NEXT: psllq $32, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] +; SSE2-NEXT: psrad $31, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: psrlq $32, %xmm0 +; SSE2-NEXT: pmuludq %xmm1, %xmm0 +; SSE2-NEXT: paddq %xmm0, %xmm0 +; SSE2-NEXT: psllq $32, %xmm0 +; SSE2-NEXT: pmuludq %xmm1, %xmm1 +; SSE2-NEXT: paddq %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: pmuldq_square: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psllq $32, %xmm1 +; SSE41-NEXT: psrad $31, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; SSE41-NEXT: pmuldq %xmm1, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX2-LABEL: pmuldq_square: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsllq $32, %xmm0, %xmm1 +; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX2-NEXT: vpmuldq %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: pmuldq_square: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX512-NEXT: vpsraq $32, %zmm0, %zmm0 +; AVX512-NEXT: vpmuldq %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %1 = shl <2 x i64> %x, + %2 = ashr exact <2 x i64> %1, + %3 = mul nsw <2 x i64> %2, %2 + ret <2 x i64> %3 +} + +define <2 x i64> @pmuludq_square(<2 x i64> %x) { +; SSE2-LABEL: pmuludq_square: +; SSE2: # %bb.0: +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: pmuludq_square: +; SSE41: # %bb.0: +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; SSE41-NEXT: pmuludq %xmm1, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: pmuludq_square: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX-NEXT: vpmuludq %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = and <2 x i64> %x, + %2 = mul nuw <2 x i64> %1, %1 + ret <2 x i64> %2 +}