From: Simon Pilgrim Date: Sat, 25 May 2019 18:02:17 +0000 (+0000) Subject: [X86] lowerBuildVectorToBitOp - support build_vector(shift()) -> shift(build_vector... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a9e06432b7e9e0e6bc9e6c2f8614ee672e2001b4;p=llvm [X86] lowerBuildVectorToBitOp - support build_vector(shift()) -> shift(build_vector(),C) Commonly occurs in sign-extension cases git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361706 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e124b7d6c07..170e3cf33ba 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8746,9 +8746,15 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, return SDValue(); // TODO: We may be able to add support for other Ops (ADD/SUB + shifts). + bool IsShift = false; switch (Opcode) { default: return SDValue(); + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + IsShift = true; + break; case ISD::AND: case ISD::XOR: case ISD::OR: @@ -8769,10 +8775,24 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, // We expect the canonicalized RHS operand to be the constant. if (!isa(RHS)) return SDValue(); + + // Extend shift amounts. + if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) { + if (!IsShift) + return SDValue(); + RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType()); + } + LHSElts.push_back(LHS); RHSElts.push_back(RHS); } + // Limit to shifts by uniform immediates. + // TODO: Only accept vXi8/vXi64 special cases? + // TODO: Permit non-uniform XOP/AVX2/MULLO cases? + if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; })) + return SDValue(); + SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts); SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts); return DAG.getNode(Opcode, DL, VT, LHS, RHS); diff --git a/test/CodeGen/X86/rotate-extract-vector.ll b/test/CodeGen/X86/rotate-extract-vector.ll index 6301f3bf747..4959de71172 100644 --- a/test/CodeGen/X86/rotate-extract-vector.ll +++ b/test/CodeGen/X86/rotate-extract-vector.ll @@ -86,13 +86,12 @@ define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind { ; X64-NEXT: vpextrq $1, %xmm0, %rax ; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB ; X64-NEXT: mulq %rcx -; X64-NEXT: shrq %rdx ; X64-NEXT: vmovq %rdx, %xmm1 ; X64-NEXT: vmovq %xmm0, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: shrq %rdx ; X64-NEXT: vmovq %rdx, %xmm0 ; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: vpsrlq $1, %xmm0, %xmm0 ; X64-NEXT: vprolq $57, %zmm0, %zmm0 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; X64-NEXT: vzeroupper @@ -256,24 +255,22 @@ define <2 x i64> @no_extract_udiv(<2 x i64> %i) nounwind { ; X64-NEXT: movabsq $-6148914691236517205, %rdi # imm = 0xAAAAAAAAAAAAAAAB ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: shrq %rdx ; X64-NEXT: vmovq %rdx, %xmm1 ; X64-NEXT: vmovq %xmm0, %rsi ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: shrq %rdx ; X64-NEXT: vmovq %rdx, %xmm0 ; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: vpsrlq $1, %xmm0, %xmm0 ; X64-NEXT: movabsq $-6180857105216966645, %rdi # imm = 0xAA392F35DC17F00B ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: shrq $9, %rdx ; X64-NEXT: vmovq %rdx, %xmm1 ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: shrq $9, %rdx ; X64-NEXT: vmovq %rdx, %xmm2 ; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; X64-NEXT: vpsrlq $9, %xmm1, %xmm1 ; X64-NEXT: vpsllq $56, %xmm0, %xmm0 ; X64-NEXT: vpor %xmm1, %xmm0, %xmm0 ; X64-NEXT: retq diff --git a/test/CodeGen/X86/vector-idiv-udiv-128.ll b/test/CodeGen/X86/vector-idiv-udiv-128.ll index 62bcc54072b..e599ceea7c9 100644 --- a/test/CodeGen/X86/vector-idiv-udiv-128.ll +++ b/test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -19,7 +19,6 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind { ; SSE2-NEXT: subq %rdx, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: addq %rdx, %rcx -; SSE2-NEXT: shrq $2, %rcx ; SSE2-NEXT: movq %rcx, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE2-NEXT: movq %xmm0, %rcx @@ -28,9 +27,9 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind { ; SSE2-NEXT: subq %rdx, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: addq %rdx, %rcx -; SSE2-NEXT: shrq $2, %rcx ; SSE2-NEXT: movq %rcx, %xmm0 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE2-NEXT: psrlq $2, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -43,7 +42,6 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind { ; SSE41-NEXT: subq %rdx, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: addq %rdx, %rcx -; SSE41-NEXT: shrq $2, %rcx ; SSE41-NEXT: movq %rcx, %xmm1 ; SSE41-NEXT: movq %xmm0, %rcx ; SSE41-NEXT: movq %rcx, %rax @@ -51,9 +49,9 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind { ; SSE41-NEXT: subq %rdx, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: addq %rdx, %rcx -; SSE41-NEXT: shrq $2, %rcx ; SSE41-NEXT: movq %rcx, %xmm0 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: psrlq $2, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_div7_2i64: @@ -65,7 +63,6 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind { ; AVX-NEXT: subq %rdx, %rcx ; AVX-NEXT: shrq %rcx ; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx ; AVX-NEXT: vmovq %rcx, %xmm1 ; AVX-NEXT: vmovq %xmm0, %rcx ; AVX-NEXT: movq %rcx, %rax @@ -73,9 +70,9 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind { ; AVX-NEXT: subq %rdx, %rcx ; AVX-NEXT: shrq %rcx ; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx ; AVX-NEXT: vmovq %rcx, %xmm0 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vpsrlq $2, %xmm0, %xmm0 ; AVX-NEXT: retq %res = udiv <2 x i64> %a, ret <2 x i64> %res diff --git a/test/CodeGen/X86/vector-idiv-udiv-256.ll b/test/CodeGen/X86/vector-idiv-udiv-256.ll index f40a07935f5..198c6de8b0e 100644 --- a/test/CodeGen/X86/vector-idiv-udiv-256.ll +++ b/test/CodeGen/X86/vector-idiv-udiv-256.ll @@ -10,32 +10,30 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind { ; AVX1-LABEL: test_div7_4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpextrq $1, %xmm1, %rcx +; AVX1-NEXT: vpextrq $1, %xmm0, %rcx ; AVX1-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 ; AVX1-NEXT: movq %rcx, %rax ; AVX1-NEXT: mulq %rsi ; AVX1-NEXT: subq %rdx, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: addq %rdx, %rcx -; AVX1-NEXT: shrq $2, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm2 -; AVX1-NEXT: vmovq %xmm1, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm1 +; AVX1-NEXT: vmovq %xmm0, %rcx ; AVX1-NEXT: movq %rcx, %rax ; AVX1-NEXT: mulq %rsi ; AVX1-NEXT: subq %rdx, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: addq %rdx, %rcx -; AVX1-NEXT: shrq $2, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm1 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX1-NEXT: vpsrlq $2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpextrq $1, %xmm0, %rcx ; AVX1-NEXT: movq %rcx, %rax ; AVX1-NEXT: mulq %rsi ; AVX1-NEXT: subq %rdx, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: addq %rdx, %rcx -; AVX1-NEXT: shrq $2, %rcx ; AVX1-NEXT: vmovq %rcx, %xmm2 ; AVX1-NEXT: vmovq %xmm0, %rcx ; AVX1-NEXT: movq %rcx, %rax @@ -43,10 +41,10 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind { ; AVX1-NEXT: subq %rdx, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: addq %rdx, %rcx -; AVX1-NEXT: shrq $2, %rcx ; AVX1-NEXT: vmovq %rcx, %xmm0 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vpsrlq $2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_div7_4i64: @@ -59,7 +57,6 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: subq %rdx, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: addq %rdx, %rcx -; AVX2-NEXT: shrq $2, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm2 ; AVX2-NEXT: vmovq %xmm1, %rcx ; AVX2-NEXT: movq %rcx, %rax @@ -67,7 +64,6 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: subq %rdx, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: addq %rdx, %rcx -; AVX2-NEXT: shrq $2, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm1 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX2-NEXT: vpextrq $1, %xmm0, %rcx @@ -76,7 +72,6 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: subq %rdx, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: addq %rdx, %rcx -; AVX2-NEXT: shrq $2, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm2 ; AVX2-NEXT: vmovq %xmm0, %rcx ; AVX2-NEXT: movq %rcx, %rax @@ -84,10 +79,10 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: subq %rdx, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: addq %rdx, %rcx -; AVX2-NEXT: shrq $2, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm0 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlq $2, %ymm0, %ymm0 ; AVX2-NEXT: retq %res = udiv <4 x i64> %a, ret <4 x i64> %res diff --git a/test/CodeGen/X86/vector-idiv-udiv-512.ll b/test/CodeGen/X86/vector-idiv-udiv-512.ll index 18ecac073df..495d35a0c84 100644 --- a/test/CodeGen/X86/vector-idiv-udiv-512.ll +++ b/test/CodeGen/X86/vector-idiv-udiv-512.ll @@ -17,7 +17,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: subq %rdx, %rcx ; AVX-NEXT: shrq %rcx ; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx ; AVX-NEXT: vmovq %rcx, %xmm2 ; AVX-NEXT: vmovq %xmm1, %rcx ; AVX-NEXT: movq %rcx, %rax @@ -25,7 +24,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: subq %rdx, %rcx ; AVX-NEXT: shrq %rcx ; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx ; AVX-NEXT: vmovq %rcx, %xmm1 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 @@ -35,7 +33,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: subq %rdx, %rcx ; AVX-NEXT: shrq %rcx ; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx ; AVX-NEXT: vmovq %rcx, %xmm3 ; AVX-NEXT: vmovq %xmm2, %rcx ; AVX-NEXT: movq %rcx, %rax @@ -43,7 +40,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: subq %rdx, %rcx ; AVX-NEXT: shrq %rcx ; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx ; AVX-NEXT: vmovq %rcx, %xmm2 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 @@ -54,7 +50,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: subq %rdx, %rcx ; AVX-NEXT: shrq %rcx ; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx ; AVX-NEXT: vmovq %rcx, %xmm3 ; AVX-NEXT: vmovq %xmm2, %rcx ; AVX-NEXT: movq %rcx, %rax @@ -62,7 +57,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: subq %rdx, %rcx ; AVX-NEXT: shrq %rcx ; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx ; AVX-NEXT: vmovq %rcx, %xmm2 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX-NEXT: vpextrq $1, %xmm0, %rcx @@ -71,7 +65,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: subq %rdx, %rcx ; AVX-NEXT: shrq %rcx ; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx ; AVX-NEXT: vmovq %rcx, %xmm3 ; AVX-NEXT: vmovq %xmm0, %rcx ; AVX-NEXT: movq %rcx, %rax @@ -79,11 +72,11 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind { ; AVX-NEXT: subq %rdx, %rcx ; AVX-NEXT: shrq %rcx ; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx ; AVX-NEXT: vmovq %rcx, %xmm0 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX-NEXT: vpsrlq $2, %zmm0, %zmm0 ; AVX-NEXT: retq %res = udiv <8 x i64> %a, ret <8 x i64> %res diff --git a/test/CodeGen/X86/vector-sext-widen.ll b/test/CodeGen/X86/vector-sext-widen.ll index c22ffd186c4..c4a2286b8ea 100644 --- a/test/CodeGen/X86/vector-sext-widen.ll +++ b/test/CodeGen/X86/vector-sext-widen.ll @@ -1354,12 +1354,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; SSE-NEXT: movzbl (%rdi), %eax ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shlq $62, %rcx -; SSE-NEXT: sarq $63, %rcx -; SSE-NEXT: movq %rcx, %xmm1 +; SSE-NEXT: movq %rcx, %xmm0 ; SSE-NEXT: shlq $63, %rax -; SSE-NEXT: sarq $63, %rax -; SSE-NEXT: movq %rax, %xmm0 -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movq %rax, %xmm1 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE-NEXT: retq ; ; AVX1-LABEL: load_sext_2i1_to_2i64: @@ -1367,12 +1367,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; AVX1-NEXT: movzbl (%rdi), %eax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shlq $62, %rcx -; AVX1-NEXT: sarq $63, %rcx ; AVX1-NEXT: vmovq %rcx, %xmm0 ; AVX1-NEXT: shlq $63, %rax -; AVX1-NEXT: sarq $63, %rax ; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: load_sext_2i1_to_2i64: @@ -1380,12 +1380,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; AVX2-NEXT: movzbl (%rdi), %eax ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $62, %rcx -; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm0 ; AVX2-NEXT: shlq $63, %rax -; AVX2-NEXT: sarq $63, %rax ; AVX2-NEXT: vmovq %rax, %xmm1 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: load_sext_2i1_to_2i64: @@ -1402,14 +1402,13 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; X32-SSE2-NEXT: movzbl (%eax), %eax ; X32-SSE2-NEXT: movl %eax, %ecx ; X32-SSE2-NEXT: shll $30, %ecx -; X32-SSE2-NEXT: sarl $31, %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm0 ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] ; X32-SSE2-NEXT: shll $31, %eax -; X32-SSE2-NEXT: sarl $31, %eax ; X32-SSE2-NEXT: movd %eax, %xmm0 ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-SSE2-NEXT: psrad $31, %xmm0 ; X32-SSE2-NEXT: retl ; ; X32-SSE41-LABEL: load_sext_2i1_to_2i64: @@ -1418,13 +1417,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; X32-SSE41-NEXT: movzbl (%eax), %eax ; X32-SSE41-NEXT: movl %eax, %ecx ; X32-SSE41-NEXT: shll $31, %ecx -; X32-SSE41-NEXT: sarl $31, %ecx ; X32-SSE41-NEXT: movd %ecx, %xmm0 ; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 ; X32-SSE41-NEXT: shll $30, %eax -; X32-SSE41-NEXT: sarl $31, %eax ; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0 ; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 +; X32-SSE41-NEXT: psrad $31, %xmm0 ; X32-SSE41-NEXT: retl entry: %X = load <2 x i1>, <2 x i1>* %ptr @@ -1612,22 +1610,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; X32-SSE2-NEXT: movl (%eax), %eax ; X32-SSE2-NEXT: movl %eax, %ecx ; X32-SSE2-NEXT: shll $28, %ecx -; X32-SSE2-NEXT: sarl $31, %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm0 ; X32-SSE2-NEXT: movl %eax, %ecx ; X32-SSE2-NEXT: shll $29, %ecx -; X32-SSE2-NEXT: sarl $31, %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm1 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32-SSE2-NEXT: movl %eax, %ecx ; X32-SSE2-NEXT: shll $30, %ecx -; X32-SSE2-NEXT: sarl $31, %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm2 ; X32-SSE2-NEXT: shll $31, %eax -; X32-SSE2-NEXT: sarl $31, %eax ; X32-SSE2-NEXT: movd %eax, %xmm0 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-SSE2-NEXT: psrad $31, %xmm0 ; X32-SSE2-NEXT: retl ; ; X32-SSE41-LABEL: load_sext_4i1_to_4i32: @@ -1636,19 +1631,16 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; X32-SSE41-NEXT: movl (%eax), %eax ; X32-SSE41-NEXT: movl %eax, %ecx ; X32-SSE41-NEXT: shll $30, %ecx -; X32-SSE41-NEXT: sarl $31, %ecx ; X32-SSE41-NEXT: movl %eax, %edx ; X32-SSE41-NEXT: shll $31, %edx -; X32-SSE41-NEXT: sarl $31, %edx ; X32-SSE41-NEXT: movd %edx, %xmm0 ; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 ; X32-SSE41-NEXT: movl %eax, %ecx ; X32-SSE41-NEXT: shll $29, %ecx -; X32-SSE41-NEXT: sarl $31, %ecx ; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0 ; X32-SSE41-NEXT: shll $28, %eax -; X32-SSE41-NEXT: sarl $31, %eax ; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 +; X32-SSE41-NEXT: psrad $31, %xmm0 ; X32-SSE41-NEXT: retl entry: %X = load <4 x i1>, <4 x i1>* %ptr @@ -1808,22 +1800,20 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; AVX2-NEXT: movl (%rdi), %eax ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $60, %rcx -; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm0 ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $61, %rcx -; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm1 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $62, %rcx -; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm1 ; AVX2-NEXT: shlq $63, %rax -; AVX2-NEXT: sarq $63, %rax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: load_sext_4i1_to_4i64: @@ -5990,22 +5980,19 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { ; X32-SSE2-NEXT: movl 8(%eax), %eax ; X32-SSE2-NEXT: shldl $13, %edx, %eax ; X32-SSE2-NEXT: shll $15, %eax -; X32-SSE2-NEXT: sarl $15, %eax ; X32-SSE2-NEXT: movd %eax, %xmm0 ; X32-SSE2-NEXT: movl %edx, %eax ; X32-SSE2-NEXT: shll $13, %eax -; X32-SSE2-NEXT: sarl $15, %eax ; X32-SSE2-NEXT: movd %eax, %xmm1 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32-SSE2-NEXT: shldl $15, %ecx, %edx ; X32-SSE2-NEXT: shll $15, %ecx -; X32-SSE2-NEXT: sarl $15, %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm0 ; X32-SSE2-NEXT: shll $15, %edx -; X32-SSE2-NEXT: sarl $15, %edx ; X32-SSE2-NEXT: movd %edx, %xmm2 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-SSE2-NEXT: psrad $15, %xmm0 ; X32-SSE2-NEXT: retl ; ; X32-SSE41-LABEL: sext_4i17_to_4i32: @@ -6021,17 +6008,14 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { ; X32-SSE41-NEXT: shldl $13, %edx, %eax ; X32-SSE41-NEXT: shldl $15, %ecx, %edx ; X32-SSE41-NEXT: shll $15, %edx -; X32-SSE41-NEXT: sarl $15, %edx ; X32-SSE41-NEXT: shll $15, %ecx -; X32-SSE41-NEXT: sarl $15, %ecx ; X32-SSE41-NEXT: movd %ecx, %xmm0 ; X32-SSE41-NEXT: pinsrd $1, %edx, %xmm0 ; X32-SSE41-NEXT: shll $13, %esi -; X32-SSE41-NEXT: sarl $15, %esi ; X32-SSE41-NEXT: pinsrd $2, %esi, %xmm0 ; X32-SSE41-NEXT: shll $15, %eax -; X32-SSE41-NEXT: sarl $15, %eax ; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 +; X32-SSE41-NEXT: psrad $15, %xmm0 ; X32-SSE41-NEXT: popl %esi ; X32-SSE41-NEXT: .cfi_def_cfa_offset 4 ; X32-SSE41-NEXT: retl diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll index 3a9dbaeb57a..50efdc10af6 100644 --- a/test/CodeGen/X86/vector-sext.ll +++ b/test/CodeGen/X86/vector-sext.ll @@ -1354,12 +1354,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; SSE-NEXT: movzbl (%rdi), %eax ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shlq $62, %rcx -; SSE-NEXT: sarq $63, %rcx -; SSE-NEXT: movq %rcx, %xmm1 +; SSE-NEXT: movq %rcx, %xmm0 ; SSE-NEXT: shlq $63, %rax -; SSE-NEXT: sarq $63, %rax -; SSE-NEXT: movq %rax, %xmm0 -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movq %rax, %xmm1 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE-NEXT: retq ; ; AVX1-LABEL: load_sext_2i1_to_2i64: @@ -1367,12 +1367,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; AVX1-NEXT: movzbl (%rdi), %eax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shlq $62, %rcx -; AVX1-NEXT: sarq $63, %rcx ; AVX1-NEXT: vmovq %rcx, %xmm0 ; AVX1-NEXT: shlq $63, %rax -; AVX1-NEXT: sarq $63, %rax ; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: load_sext_2i1_to_2i64: @@ -1380,12 +1380,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; AVX2-NEXT: movzbl (%rdi), %eax ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $62, %rcx -; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm0 ; AVX2-NEXT: shlq $63, %rax -; AVX2-NEXT: sarq $63, %rax ; AVX2-NEXT: vmovq %rax, %xmm1 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: load_sext_2i1_to_2i64: @@ -1402,14 +1402,13 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; X32-SSE2-NEXT: movzbl (%eax), %eax ; X32-SSE2-NEXT: movl %eax, %ecx ; X32-SSE2-NEXT: shll $30, %ecx -; X32-SSE2-NEXT: sarl $31, %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm0 ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] ; X32-SSE2-NEXT: shll $31, %eax -; X32-SSE2-NEXT: sarl $31, %eax ; X32-SSE2-NEXT: movd %eax, %xmm0 ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-SSE2-NEXT: psrad $31, %xmm0 ; X32-SSE2-NEXT: retl ; ; X32-SSE41-LABEL: load_sext_2i1_to_2i64: @@ -1418,13 +1417,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; X32-SSE41-NEXT: movzbl (%eax), %eax ; X32-SSE41-NEXT: movl %eax, %ecx ; X32-SSE41-NEXT: shll $31, %ecx -; X32-SSE41-NEXT: sarl $31, %ecx ; X32-SSE41-NEXT: movd %ecx, %xmm0 ; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 ; X32-SSE41-NEXT: shll $30, %eax -; X32-SSE41-NEXT: sarl $31, %eax ; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0 ; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 +; X32-SSE41-NEXT: psrad $31, %xmm0 ; X32-SSE41-NEXT: retl entry: %X = load <2 x i1>, <2 x i1>* %ptr @@ -1612,22 +1610,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; X32-SSE2-NEXT: movl (%eax), %eax ; X32-SSE2-NEXT: movl %eax, %ecx ; X32-SSE2-NEXT: shll $28, %ecx -; X32-SSE2-NEXT: sarl $31, %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm0 ; X32-SSE2-NEXT: movl %eax, %ecx ; X32-SSE2-NEXT: shll $29, %ecx -; X32-SSE2-NEXT: sarl $31, %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm1 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32-SSE2-NEXT: movl %eax, %ecx ; X32-SSE2-NEXT: shll $30, %ecx -; X32-SSE2-NEXT: sarl $31, %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm2 ; X32-SSE2-NEXT: shll $31, %eax -; X32-SSE2-NEXT: sarl $31, %eax ; X32-SSE2-NEXT: movd %eax, %xmm0 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-SSE2-NEXT: psrad $31, %xmm0 ; X32-SSE2-NEXT: retl ; ; X32-SSE41-LABEL: load_sext_4i1_to_4i32: @@ -1636,19 +1631,16 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; X32-SSE41-NEXT: movl (%eax), %eax ; X32-SSE41-NEXT: movl %eax, %ecx ; X32-SSE41-NEXT: shll $30, %ecx -; X32-SSE41-NEXT: sarl $31, %ecx ; X32-SSE41-NEXT: movl %eax, %edx ; X32-SSE41-NEXT: shll $31, %edx -; X32-SSE41-NEXT: sarl $31, %edx ; X32-SSE41-NEXT: movd %edx, %xmm0 ; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 ; X32-SSE41-NEXT: movl %eax, %ecx ; X32-SSE41-NEXT: shll $29, %ecx -; X32-SSE41-NEXT: sarl $31, %ecx ; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0 ; X32-SSE41-NEXT: shll $28, %eax -; X32-SSE41-NEXT: sarl $31, %eax ; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 +; X32-SSE41-NEXT: psrad $31, %xmm0 ; X32-SSE41-NEXT: retl entry: %X = load <4 x i1>, <4 x i1>* %ptr @@ -1808,22 +1800,20 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; AVX2-NEXT: movl (%rdi), %eax ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $60, %rcx -; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm0 ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $61, %rcx -; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm1 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shlq $62, %rcx -; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm1 ; AVX2-NEXT: shlq $63, %rax -; AVX2-NEXT: sarq $63, %rax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: load_sext_4i1_to_4i64: @@ -6008,22 +5998,19 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { ; X32-SSE2-NEXT: movl 8(%eax), %eax ; X32-SSE2-NEXT: shldl $13, %edx, %eax ; X32-SSE2-NEXT: shll $15, %eax -; X32-SSE2-NEXT: sarl $15, %eax ; X32-SSE2-NEXT: movd %eax, %xmm0 ; X32-SSE2-NEXT: movl %edx, %eax ; X32-SSE2-NEXT: shll $13, %eax -; X32-SSE2-NEXT: sarl $15, %eax ; X32-SSE2-NEXT: movd %eax, %xmm1 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32-SSE2-NEXT: shldl $15, %ecx, %edx ; X32-SSE2-NEXT: shll $15, %ecx -; X32-SSE2-NEXT: sarl $15, %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm0 ; X32-SSE2-NEXT: shll $15, %edx -; X32-SSE2-NEXT: sarl $15, %edx ; X32-SSE2-NEXT: movd %edx, %xmm2 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-SSE2-NEXT: psrad $15, %xmm0 ; X32-SSE2-NEXT: retl ; ; X32-SSE41-LABEL: sext_4i17_to_4i32: @@ -6039,17 +6026,14 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { ; X32-SSE41-NEXT: shldl $13, %edx, %eax ; X32-SSE41-NEXT: shldl $15, %ecx, %edx ; X32-SSE41-NEXT: shll $15, %edx -; X32-SSE41-NEXT: sarl $15, %edx ; X32-SSE41-NEXT: shll $15, %ecx -; X32-SSE41-NEXT: sarl $15, %ecx ; X32-SSE41-NEXT: movd %ecx, %xmm0 ; X32-SSE41-NEXT: pinsrd $1, %edx, %xmm0 ; X32-SSE41-NEXT: shll $13, %esi -; X32-SSE41-NEXT: sarl $15, %esi ; X32-SSE41-NEXT: pinsrd $2, %esi, %xmm0 ; X32-SSE41-NEXT: shll $15, %eax -; X32-SSE41-NEXT: sarl $15, %eax ; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 +; X32-SSE41-NEXT: psrad $15, %xmm0 ; X32-SSE41-NEXT: popl %esi ; X32-SSE41-NEXT: .cfi_def_cfa_offset 4 ; X32-SSE41-NEXT: retl