From 9334970101effb7636c9ee9b583abd0135b6e897 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 Jan 2017 11:21:57 +0000 Subject: [PATCH] [X86][SSE] Add support for constant folding vector logical shift by immediates git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292915 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 24 +++++-- test/CodeGen/X86/vec_shift5.ll | 12 ++-- test/CodeGen/X86/vector-rotate-128.ll | 60 ++++++++--------- test/CodeGen/X86/vector-rotate-256.ll | 48 +++++++------- test/CodeGen/X86/vector-shift-ashr-128.ll | 12 ++-- test/CodeGen/X86/vector-shift-ashr-256.ll | 12 ++-- test/CodeGen/X86/vector-shift-ashr-512.ll | 5 +- test/CodeGen/X86/vector-shift-lshr-128.ll | 34 +++++----- test/CodeGen/X86/vector-shift-lshr-256.ll | 78 +++++++++++------------ test/CodeGen/X86/vector-shift-lshr-512.ll | 5 +- test/CodeGen/X86/vector-shift-shl-128.ll | 32 ++++------ test/CodeGen/X86/vector-shift-shl-256.ll | 72 ++++++++++----------- test/CodeGen/X86/vector-shift-shl-512.ll | 5 +- 13 files changed, 186 insertions(+), 213 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2168022d8cb..834a04c61d3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -30463,9 +30463,11 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG, static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { - assert((X86ISD::VSHLI == N->getOpcode() || X86ISD::VSRLI == N->getOpcode()) && - "Unexpected opcode"); + unsigned Opcode = N->getOpcode(); + assert((X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode) && + "Unexpected shift opcode"); EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); unsigned NumBitsPerElt = VT.getScalarSizeInBits(); // This fails for mask register (vXi1) shifts. @@ -30477,12 +30479,14 @@ static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG, if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N)); + SDValue N0 = N->getOperand(0); + // Shift N0 by zero -> N0. if (!ShiftVal) - return N->getOperand(0); + return N0; // Shift zero -> zero. - if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) + if (ISD::isBuildVectorAllZeros(N0.getNode())) return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N)); // We can decode 'whole byte' logical bit shifts as shuffles. @@ -30496,6 +30500,18 @@ static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG, return SDValue(); // This routine will use CombineTo to replace N. } + // Constant Folding. + SmallBitVector UndefElts; + SmallVector EltBits; + if (N->isOnlyUserOf(N0.getNode()) && + getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) { + assert(EltBits.size() == NumElts && "Unexpected shift value type"); + for (APInt &Elt : EltBits) + Elt = X86ISD::VSHLI == Opcode ? Elt.shl(ShiftVal.getZExtValue()) + : Elt.lshr(ShiftVal.getZExtValue()); + return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); + } + return SDValue(); } diff --git a/test/CodeGen/X86/vec_shift5.ll b/test/CodeGen/X86/vec_shift5.ll index cba2b5d0504..c0226d0a4c0 100644 --- a/test/CodeGen/X86/vec_shift5.ll +++ b/test/CodeGen/X86/vec_shift5.ll @@ -93,8 +93,7 @@ define <4 x i32> @test6() { define <2 x i64> @test7() { ; X32-LABEL: test7: ; X32: # BB#0: -; X32-NEXT: movdqa {{.*#+}} xmm0 = [1,0,2,0] -; X32-NEXT: psllq $3, %xmm0 +; X32-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0] ; X32-NEXT: retl ; ; X64-LABEL: test7: @@ -108,8 +107,7 @@ define <2 x i64> @test7() { define <2 x i64> @test8() { ; X32-LABEL: test8: ; X32: # BB#0: -; X32-NEXT: movdqa {{.*#+}} xmm0 = [8,0,16,0] -; X32-NEXT: psrlq $3, %xmm0 +; X32-NEXT: movaps {{.*#+}} xmm0 = [1,0,2,0] ; X32-NEXT: retl ; ; X64-LABEL: test8: @@ -151,8 +149,7 @@ define <4 x i32> @test10() { define <2 x i64> @test11() { ; X32-LABEL: test11: ; X32: # BB#0: -; X32-NEXT: movdqa {{.*#+}} xmm0 = -; X32-NEXT: psrlq $3, %xmm0 +; X32-NEXT: movaps {{.*#+}} xmm0 = ; X32-NEXT: retl ; ; X64-LABEL: test11: @@ -222,8 +219,7 @@ define <4 x i32> @test15() { define <2 x i64> @test16() { ; X32-LABEL: test16: ; X32: # BB#0: -; X32-NEXT: movdqa {{.*#+}} xmm0 = -; X32-NEXT: psllq $3, %xmm0 +; X32-NEXT: movaps {{.*#+}} xmm0 = ; X32-NEXT: retl ; ; X64-LABEL: test16: diff --git a/test/CodeGen/X86/vector-rotate-128.ll b/test/CodeGen/X86/vector-rotate-128.ll index fbb67ebbf60..eda893fc942 100644 --- a/test/CodeGen/X86/vector-rotate-128.ll +++ b/test/CodeGen/X86/vector-rotate-128.ll @@ -1044,8 +1044,7 @@ define <8 x i16> @constant_rotate_v8i16(<8 x i16> %a) nounwind { define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; SSE2-LABEL: constant_rotate_v16i8: ; SSE2: # BB#0: -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1] -; SSE2-NEXT: psllw $5, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [8192,24640,41088,57536,57600,41152,24704,8256] ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpgtb %xmm3, %xmm1 @@ -1071,8 +1070,7 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: pandn %xmm1, %xmm3 ; SSE2-NEXT: paddb %xmm1, %xmm1 ; SSE2-NEXT: pand %xmm4, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7] -; SSE2-NEXT: psllw $5, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [57600,41152,24704,8256,8192,24640,41088,57536] ; SSE2-NEXT: pxor %xmm5, %xmm5 ; SSE2-NEXT: pcmpgtb %xmm4, %xmm5 ; SSE2-NEXT: movdqa %xmm5, %xmm6 @@ -1105,11 +1103,10 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; SSE41-LABEL: constant_rotate_v16i8: ; SSE41: # BB#0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1] -; SSE41-NEXT: psllw $5, %xmm0 ; SSE41-NEXT: movdqa %xmm1, %xmm3 ; SSE41-NEXT: psllw $4, %xmm3 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,57600,41152,24704,8256] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pblendvb %xmm3, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm3 @@ -1121,11 +1118,10 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: paddb %xmm3, %xmm3 ; SSE41-NEXT: paddb %xmm0, %xmm0 ; SSE41-NEXT: pblendvb %xmm3, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7] -; SSE41-NEXT: psllw $5, %xmm0 ; SSE41-NEXT: movdqa %xmm1, %xmm3 ; SSE41-NEXT: psrlw $4, %xmm3 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [57600,41152,24704,8256,8192,24640,41088,57536] ; SSE41-NEXT: pblendvb %xmm3, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm3 ; SSE41-NEXT: psrlw $2, %xmm3 @@ -1143,31 +1139,29 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; ; AVX-LABEL: constant_rotate_v16i8: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1] -; AVX-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX-NEXT: vpsllw $4, %xmm0, %xmm2 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vpsllw $2, %xmm2, %xmm3 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3 -; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm3 -; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7] -; AVX-NEXT: vpsllw $5, %xmm2, %xmm2 -; AVX-NEXT: vpsrlw $4, %xmm0, %xmm3 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3 -; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $2, %xmm0, %xmm3 +; AVX-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,57600,41152,24704,8256] +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpsllw $2, %xmm1, %xmm3 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3 ; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $1, %xmm0, %xmm3 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3 +; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm1, %xmm1 +; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm3 ; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm1, %xmm1 +; AVX-NEXT: vpsrlw $4, %xmm0, %xmm2 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [57600,41152,24704,8256,8192,24640,41088,57536] +; AVX-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $2, %xmm0, %xmm2 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpaddb %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm2 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpaddb %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; @@ -1182,8 +1176,7 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; ; X32-SSE-LABEL: constant_rotate_v16i8: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1] -; X32-SSE-NEXT: psllw $5, %xmm3 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [8192,24640,41088,57536,57600,41152,24704,8256] ; X32-SSE-NEXT: pxor %xmm2, %xmm2 ; X32-SSE-NEXT: pxor %xmm1, %xmm1 ; X32-SSE-NEXT: pcmpgtb %xmm3, %xmm1 @@ -1209,8 +1202,7 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; X32-SSE-NEXT: pandn %xmm1, %xmm3 ; X32-SSE-NEXT: paddb %xmm1, %xmm1 ; X32-SSE-NEXT: pand %xmm4, %xmm1 -; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7] -; X32-SSE-NEXT: psllw $5, %xmm4 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [57600,41152,24704,8256,8192,24640,41088,57536] ; X32-SSE-NEXT: pxor %xmm5, %xmm5 ; X32-SSE-NEXT: pcmpgtb %xmm4, %xmm5 ; X32-SSE-NEXT: movdqa %xmm5, %xmm6 diff --git a/test/CodeGen/X86/vector-rotate-256.ll b/test/CodeGen/X86/vector-rotate-256.ll index af1755e1431..2e357660ee2 100644 --- a/test/CodeGen/X86/vector-rotate-256.ll +++ b/test/CodeGen/X86/vector-rotate-256.ll @@ -582,8 +582,7 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX1-NEXT: vpand %xmm8, %xmm2, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1] -; AVX1-NEXT: vpsllw $5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8192,24640,41088,57536,57600,41152,24704,8256] ; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm2 ; AVX1-NEXT: vpsllw $2, %xmm2, %xmm5 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] @@ -605,8 +604,7 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm8, %xmm3, %xmm3 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7] -; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [57600,41152,24704,8256,8192,24640,41088,57536] ; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] @@ -633,31 +631,29 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind { ; ; AVX2-LABEL: constant_rotate_v32i8: ; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1] -; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm2 -; AVX2-NEXT: vpsllw $2, %ymm2, %ymm3 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 -; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm3 -; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7] -; AVX2-NEXT: vpsllw $5, %ymm2, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 -; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0 -; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm3 +; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256] +; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpsllw $2, %ymm1, %ymm3 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 ; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 -; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0 -; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm3 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 +; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm3 ; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 -; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536] +; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpaddb %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpaddb %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; diff --git a/test/CodeGen/X86/vector-shift-ashr-128.ll b/test/CodeGen/X86/vector-shift-ashr-128.ll index 77a25b311e4..acad1216978 100644 --- a/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -1309,8 +1309,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE2-LABEL: constant_shift_v16i8: ; SSE2: # BB#0: ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; SSE2-NEXT: psllw $5, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [8192,24640,41088,57536,49376,32928,16480,32] ; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15] ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm5, %xmm5 @@ -1368,8 +1367,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE41-LABEL: constant_shift_v16i8: ; SSE41: # BB#0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; SSE41-NEXT: psllw $5, %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [8192,24640,41088,57536,49376,32928,16480,32] ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15] ; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE41-NEXT: movdqa %xmm2, %xmm4 @@ -1404,8 +1402,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; ; AVX-LABEL: constant_shift_v16i8: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX-NEXT: vpsllw $5, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [8192,24640,41088,57536,49376,32928,16480,32] ; AVX-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; AVX-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; AVX-NEXT: vpsraw $4, %xmm3, %xmm4 @@ -1455,8 +1452,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; X32-SSE-LABEL: constant_shift_v16i8: ; X32-SSE: # BB#0: ; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; X32-SSE-NEXT: psllw $5, %xmm3 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [8192,24640,41088,57536,49376,32928,16480,32] ; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15] ; X32-SSE-NEXT: pxor %xmm2, %xmm2 ; X32-SSE-NEXT: pxor %xmm5, %xmm5 diff --git a/test/CodeGen/X86/vector-shift-ashr-256.ll b/test/CodeGen/X86/vector-shift-ashr-256.ll index 27ed4592943..c09e6b2bc8d 100644 --- a/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -988,8 +988,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX1-LABEL: constant_shift_v32i8: ; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8192,24640,41088,57536,49376,32928,16480,32] ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15] @@ -1036,8 +1035,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; ; AVX2-LABEL: constant_shift_v32i8: ; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX2-NEXT: vpsraw $4, %ymm3, %ymm4 @@ -1085,8 +1083,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; ; AVX512DQ-LABEL: constant_shift_v32i8: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] ; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX512DQ-NEXT: vpsraw $4, %ymm3, %ymm4 @@ -1121,8 +1118,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; ; AVX512DQVL-LABEL: constant_shift_v32i8: ; AVX512DQVL: # BB#0: -; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] ; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX512DQVL-NEXT: vpsraw $4, %ymm3, %ymm4 diff --git a/test/CodeGen/X86/vector-shift-ashr-512.ll b/test/CodeGen/X86/vector-shift-ashr-512.ll index 6cc98b5f3ee..4d4b7f4e822 100644 --- a/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -309,8 +309,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind { define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { ; AVX512DQ-LABEL: constant_shift_v64i8: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2 +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31] ; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5 @@ -357,7 +356,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] ; AVX512BW-NEXT: vpsraw $4, %zmm1, %zmm2 -; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm3 +; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm3[8],zmm0[9],zmm3[9],zmm0[10],zmm3[10],zmm0[11],zmm3[11],zmm0[12],zmm3[12],zmm0[13],zmm3[13],zmm0[14],zmm3[14],zmm0[15],zmm3[15],zmm0[24],zmm3[24],zmm0[25],zmm3[25],zmm0[26],zmm3[26],zmm0[27],zmm3[27],zmm0[28],zmm3[28],zmm0[29],zmm3[29],zmm0[30],zmm3[30],zmm0[31],zmm3[31],zmm0[40],zmm3[40],zmm0[41],zmm3[41],zmm0[42],zmm3[42],zmm0[43],zmm3[43],zmm0[44],zmm3[44],zmm0[45],zmm3[45],zmm0[46],zmm3[46],zmm0[47],zmm3[47],zmm0[56],zmm3[56],zmm0[57],zmm3[57],zmm0[58],zmm3[58],zmm0[59],zmm3[59],zmm0[60],zmm3[60],zmm0[61],zmm3[61],zmm0[62],zmm3[62],zmm0[63],zmm3[63] ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} diff --git a/test/CodeGen/X86/vector-shift-lshr-128.ll b/test/CodeGen/X86/vector-shift-lshr-128.ll index 9b8c0def455..23c8f8997f7 100644 --- a/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -1069,8 +1069,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE2-LABEL: constant_shift_v16i8: ; SSE2: # BB#0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; SSE2-NEXT: psllw $5, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32] ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm3 @@ -1102,11 +1101,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE41-LABEL: constant_shift_v16i8: ; SSE41: # BB#0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; SSE41-NEXT: psllw $5, %xmm0 ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $4, %xmm2 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,49376,32928,16480,32] ; SSE41-NEXT: pblendvb %xmm2, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $2, %xmm2 @@ -1123,19 +1121,18 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; ; AVX-LABEL: constant_shift_v16i8: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX-NEXT: vpsrlw $4, %xmm0, %xmm2 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $2, %xmm0, %xmm2 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $1, %xmm0, %xmm2 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32] +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $2, %xmm0, %xmm1 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm1 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: constant_shift_v16i8: @@ -1161,8 +1158,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; ; X32-SSE-LABEL: constant_shift_v16i8: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; X32-SSE-NEXT: psllw $5, %xmm2 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32] ; X32-SSE-NEXT: pxor %xmm1, %xmm1 ; X32-SSE-NEXT: pxor %xmm3, %xmm3 ; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3 diff --git a/test/CodeGen/X86/vector-shift-lshr-256.ll b/test/CodeGen/X86/vector-shift-lshr-256.ll index 58bb8f3e6ec..60575250d71 100644 --- a/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -825,8 +825,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm8, %xmm2, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX1-NEXT: vpsllw $5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8192,24640,41088,57536,49376,32928,16480,32] ; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] @@ -852,19 +851,18 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; ; AVX2-LABEL: constant_shift_v32i8: ; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] +; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm1 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm1 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: constant_shift_v32i8: @@ -889,19 +887,18 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; ; AVX512DQ-LABEL: constant_shift_v32i8: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm1 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm1 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: constant_shift_v32i8: @@ -913,19 +910,18 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; ; AVX512DQVL-LABEL: constant_shift_v32i8: ; AVX512DQVL: # BB#0: -; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512DQVL-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512DQVL-NEXT: vpsrlw $2, %ymm0, %ymm2 -; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512DQVL-NEXT: vpsrlw $1, %ymm0, %ymm2 -; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] +; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpsrlw $2, %ymm0, %ymm1 +; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpsrlw $1, %ymm0, %ymm1 +; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: constant_shift_v32i8: diff --git a/test/CodeGen/X86/vector-shift-lshr-512.ll b/test/CodeGen/X86/vector-shift-lshr-512.ll index 905445f3016..c269f815951 100644 --- a/test/CodeGen/X86/vector-shift-lshr-512.ll +++ b/test/CodeGen/X86/vector-shift-lshr-512.ll @@ -244,8 +244,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512DQ-NEXT: vpsllw $5, %ymm4, %ymm4 +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] @@ -270,7 +269,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { ; ; AVX512BW-LABEL: constant_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm1 +; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 diff --git a/test/CodeGen/X86/vector-shift-shl-128.ll b/test/CodeGen/X86/vector-shift-shl-128.ll index 32334420f8b..d6088906ef8 100644 --- a/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/test/CodeGen/X86/vector-shift-shl-128.ll @@ -926,8 +926,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE2-LABEL: constant_shift_v16i8: ; SSE2: # BB#0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; SSE2-NEXT: psllw $5, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32] ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm3 @@ -958,11 +957,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE41-LABEL: constant_shift_v16i8: ; SSE41: # BB#0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; SSE41-NEXT: psllw $5, %xmm0 ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psllw $4, %xmm2 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,49376,32928,16480,32] ; SSE41-NEXT: pblendvb %xmm2, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psllw $2, %xmm2 @@ -978,18 +976,17 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; ; AVX-LABEL: constant_shift_v16i8: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX-NEXT: vpsllw $4, %xmm0, %xmm2 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsllw $2, %xmm0, %xmm2 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32] +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsllw $2, %xmm0, %xmm1 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: constant_shift_v16i8: @@ -1013,8 +1010,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; ; X32-SSE-LABEL: constant_shift_v16i8: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; X32-SSE-NEXT: psllw $5, %xmm2 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32] ; X32-SSE-NEXT: pxor %xmm1, %xmm1 ; X32-SSE-NEXT: pxor %xmm3, %xmm3 ; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3 diff --git a/test/CodeGen/X86/vector-shift-shl-256.ll b/test/CodeGen/X86/vector-shift-shl-256.ll index 104fa089c74..7f534050b6a 100644 --- a/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/test/CodeGen/X86/vector-shift-shl-256.ll @@ -730,8 +730,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX1-NEXT: vpsllw $5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8192,24640,41088,57536,49376,32928,16480,32] ; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsllw $2, %xmm1, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] @@ -754,18 +753,17 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; ; AVX2-LABEL: constant_shift_v32i8: ; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 -; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] +; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsllw $2, %ymm0, %ymm1 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm1 +; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: constant_shift_v32i8: @@ -788,18 +786,17 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; ; AVX512DQ-LABEL: constant_shift_v32i8: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm1 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm1 +; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: constant_shift_v32i8: @@ -811,18 +808,17 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; ; AVX512DQVL-LABEL: constant_shift_v32i8: ; AVX512DQVL: # BB#0: -; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 -; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] +; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm1 +; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm1 +; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 +; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: constant_shift_v32i8: diff --git a/test/CodeGen/X86/vector-shift-shl-512.ll b/test/CodeGen/X86/vector-shift-shl-512.ll index 180d6f3a3b0..39f8fe2f05d 100644 --- a/test/CodeGen/X86/vector-shift-shl-512.ll +++ b/test/CodeGen/X86/vector-shift-shl-512.ll @@ -230,8 +230,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { ; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512DQ-NEXT: vpsllw $5, %ymm4, %ymm4 +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] @@ -253,7 +252,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { ; ; AVX512BW-LABEL: constant_shift_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm1 +; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -- 2.40.0