From 396a07242341c06b69608187f1336b196551aa01 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 3 Oct 2017 09:54:03 +0000 Subject: [PATCH] [X86][SSE] Add support for shuffle combining from PACKSS/PACKUS Mentioned in D38472 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314777 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++++ test/CodeGen/X86/vector-mul.ll | 20 ++++---------------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3dd4d74ca40..4ba5c2c8c8d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -31918,6 +31918,10 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N)); } + // Attempt to combine as shuffle. + SDValue Op(N, 0); + combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, + /*HasVarMask*/ false, DAG, DCI, Subtarget); return SDValue(); } diff --git a/test/CodeGen/X86/vector-mul.ll b/test/CodeGen/X86/vector-mul.ll index 19f87493d5c..95e64e9049d 100644 --- a/test/CodeGen/X86/vector-mul.ll +++ b/test/CodeGen/X86/vector-mul.ll @@ -358,13 +358,10 @@ define <16 x i8> @mul_v16i8_17(<16 x i8> %a0) nounwind { ; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm1 ; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17] ; X64-XOP-NEXT: vpmullw %xmm2, %xmm1, %xmm1 -; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; X64-XOP-NEXT: vpand %xmm3, %xmm1, %xmm1 ; X64-XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm0 ; X64-XOP-NEXT: vpmullw %xmm2, %xmm0, %xmm0 -; X64-XOP-NEXT: vpand %xmm3, %xmm0, %xmm0 -; X64-XOP-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 +; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0,2,4,6,8,10,12,14],xmm0[0,2,4,6,8,10,12,14] ; X64-XOP-NEXT: retq ; ; X64-AVX2-LABEL: mul_v16i8_17: @@ -493,13 +490,10 @@ define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8> ; X64-XOP: # BB#0: ; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm1 ; X64-XOP-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1 -; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; X64-XOP-NEXT: vpand %xmm2, %xmm1, %xmm1 ; X64-XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm0 ; X64-XOP-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 -; X64-XOP-NEXT: vpand %xmm2, %xmm0, %xmm0 -; X64-XOP-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 +; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0,2,4,6,8,10,12,14],xmm0[0,2,4,6,8,10,12,14] ; X64-XOP-NEXT: retq ; ; X64-AVX2-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: @@ -637,13 +631,10 @@ define <16 x i8> @mul_v16i8_31(<16 x i8> %a0) nounwind { ; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm1 ; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31] ; X64-XOP-NEXT: vpmullw %xmm2, %xmm1, %xmm1 -; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; X64-XOP-NEXT: vpand %xmm3, %xmm1, %xmm1 ; X64-XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm0 ; X64-XOP-NEXT: vpmullw %xmm2, %xmm0, %xmm0 -; X64-XOP-NEXT: vpand %xmm3, %xmm0, %xmm0 -; X64-XOP-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 +; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0,2,4,6,8,10,12,14],xmm0[0,2,4,6,8,10,12,14] ; X64-XOP-NEXT: retq ; ; X64-AVX2-LABEL: mul_v16i8_31: @@ -1005,13 +996,10 @@ define <16 x i8> @mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127(<16 x i8> ; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm1 ; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,3,7,15,31,63,127] ; X64-XOP-NEXT: vpmullw %xmm2, %xmm1, %xmm1 -; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; X64-XOP-NEXT: vpand %xmm3, %xmm1, %xmm1 ; X64-XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm0 ; X64-XOP-NEXT: vpmullw %xmm2, %xmm0, %xmm0 -; X64-XOP-NEXT: vpand %xmm3, %xmm0, %xmm0 -; X64-XOP-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 +; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0,2,4,6,8,10,12,14],xmm0[0,2,4,6,8,10,12,14] ; X64-XOP-NEXT: retq ; ; X64-AVX2-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: -- 2.40.0