From: Simon Pilgrim Date: Sat, 11 Feb 2017 21:55:24 +0000 (+0000) Subject: [X86][SSE] Improve VSEXT/VZEXT constant folding. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=796f9e5e578ecd0d4d399bf118dbfc17896a90b5;p=llvm [X86][SSE] Improve VSEXT/VZEXT constant folding. Generalize VSEXT/VZEXT constant folding to work with any target constant bits source not just BUILD_VECTOR . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294873 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f43567573eb..474a8a60b9b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -33996,30 +33996,37 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + SDLoc DL(N); unsigned Opcode = N->getOpcode(); MVT VT = N->getSimpleValueType(0); MVT SVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = SVT.getSizeInBits(); + SDValue Op = N->getOperand(0); MVT OpVT = Op.getSimpleValueType(); MVT OpEltVT = OpVT.getVectorElementType(); - unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements(); + unsigned OpEltSizeInBits = OpEltVT.getSizeInBits(); + unsigned InputBits = OpEltSizeInBits * NumElts; // Perform any constant folding. // FIXME: Reduce constant pool usage and don't fold when OptSize is enabled. - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { - unsigned NumDstElts = VT.getVectorNumElements(); - SmallBitVector Undefs(NumDstElts, false); - SmallVector Vals(NumDstElts, APInt(SVT.getSizeInBits(), 0)); - for (unsigned i = 0; i != NumDstElts; ++i) { - SDValue OpElt = Op.getOperand(i); - if (OpElt.getOpcode() == ISD::UNDEF) { + SmallBitVector UndefElts; + SmallVector EltBits; + if (getTargetConstantBitsFromNode(Op, OpEltSizeInBits, UndefElts, EltBits)) { + SmallBitVector Undefs(NumElts, false); + SmallVector Vals(NumElts, APInt(EltSizeInBits, 0)); + bool IsZEXT = (Opcode == X86ISD::VZEXT); + for (unsigned i = 0; i != NumElts; ++i) { + if (UndefElts[i]) { Undefs[i] = true; continue; } - APInt Cst = cast(OpElt.getNode())->getAPIntValue(); - Vals[i] = Opcode == X86ISD::VZEXT ? Cst.zextOrTrunc(SVT.getSizeInBits()) - : Cst.sextOrTrunc(SVT.getSizeInBits()); + Vals[i] = IsZEXT ? EltBits[i].zextOrTrunc(EltSizeInBits) + : EltBits[i].sextOrTrunc(EltSizeInBits); } return getConstVector(Vals, Undefs, VT, DAG, DL); } diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index 6bd28b18b23..6d0c7c2be96 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -927,14 +927,8 @@ define void @store_i8_i1(i8 %x, i1 *%y) { define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) { ; KNL-LABEL: test_build_vec_v32i1: ; KNL: ## BB#0: -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 -; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 -; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0 -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 -; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 -; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1 +; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 ; KNL-NEXT: retq ; ; SKX-LABEL: test_build_vec_v32i1: diff --git a/test/CodeGen/X86/fold-vector-sext-zext.ll b/test/CodeGen/X86/fold-vector-sext-zext.ll index 3f502efa753..f6d1e2c60be 100644 --- a/test/CodeGen/X86/fold-vector-sext-zext.ll +++ b/test/CodeGen/X86/fold-vector-sext-zext.ll @@ -245,9 +245,8 @@ define <4 x i32> @test_zext_4i8_4i32() { define <4 x i64> @test_zext_4i8_4i64() { ; X32-LABEL: test_zext_4i8_4i64: ; X32: # BB#0: -; X32-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; X32-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,255,0] +; X32-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_zext_4i8_4i64: @@ -301,8 +300,7 @@ define <4 x i32> @test_zext_4i8_4i32_undef() { define <4 x i64> @test_zext_4i8_4i64_undef() { ; X32-LABEL: test_zext_4i8_4i64_undef: ; X32: # BB#0: -; X32-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [255,0,255,0] ; X32-NEXT: movl $2, %eax ; X32-NEXT: vmovd %eax, %xmm1 ; X32-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero diff --git a/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/test/CodeGen/X86/vector-idiv-sdiv-128.ll index 933359fa084..895bf5c0f02 100644 --- a/test/CodeGen/X86/vector-idiv-sdiv-128.ll +++ b/test/CodeGen/X86/vector-idiv-sdiv-128.ll @@ -203,7 +203,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; SSE41-LABEL: test_div7_16i8: ; SSE41: # BB#0: ; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 -; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [65427,65427,65427,65427,65427,65427,65427,65427] ; SSE41-NEXT: pmullw %xmm2, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] @@ -227,7 +227,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; AVX1-LABEL: test_div7_16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 -; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [65427,65427,65427,65427,65427,65427,65427,65427] ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] @@ -249,8 +249,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; AVX2-LABEL: test_div7_16i8: ; AVX2: # BB#0: ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2 -; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -522,7 +521,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; SSE41-LABEL: test_rem7_16i8: ; SSE41: # BB#0: ; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 -; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [65427,65427,65427,65427,65427,65427,65427,65427] ; SSE41-NEXT: pmullw %xmm2, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] @@ -556,7 +555,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; AVX1-LABEL: test_rem7_16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 -; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [65427,65427,65427,65427,65427,65427,65427,65427] ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] @@ -589,8 +588,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; AVX2-LABEL: test_rem7_16i8: ; AVX2: # BB#0: ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2 -; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 diff --git a/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/test/CodeGen/X86/vector-idiv-sdiv-256.ll index 8cdab462806..e7bfe377821 100644 --- a/test/CodeGen/X86/vector-idiv-sdiv-256.ll +++ b/test/CodeGen/X86/vector-idiv-sdiv-256.ll @@ -163,7 +163,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpmovsxbw %xmm1, %xmm2 -; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65427,65427,65427,65427,65427,65427,65427,65427] ; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1] @@ -439,7 +439,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpmovsxbw %xmm2, %xmm3 -; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [65427,65427,65427,65427,65427,65427,65427,65427] ; AVX1-NEXT: vpmullw %xmm1, %xmm3, %xmm3 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1] diff --git a/test/CodeGen/X86/vector-idiv-udiv-128.ll b/test/CodeGen/X86/vector-idiv-udiv-128.ll index 7857e585dca..87f2630346a 100644 --- a/test/CodeGen/X86/vector-idiv-udiv-128.ll +++ b/test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -195,7 +195,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; SSE41-LABEL: test_div7_16i8: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [37,37,37,37,37,37,37,37] ; SSE41-NEXT: pmullw %xmm2, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] @@ -214,7 +214,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; AVX1-LABEL: test_div7_16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [37,37,37,37,37,37,37,37] ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] @@ -233,8 +233,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; AVX2-LABEL: test_div7_16i8: ; AVX2: # BB#0: ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -501,7 +500,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; SSE41-LABEL: test_rem7_16i8: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [37,37,37,37,37,37,37,37] ; SSE41-NEXT: pmullw %xmm2, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] @@ -532,7 +531,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; AVX1-LABEL: test_rem7_16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [37,37,37,37,37,37,37,37] ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] @@ -562,8 +561,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; AVX2-LABEL: test_rem7_16i8: ; AVX2: # BB#0: ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 diff --git a/test/CodeGen/X86/vector-idiv-udiv-256.ll b/test/CodeGen/X86/vector-idiv-udiv-256.ll index 7a2f99730e2..4adc2e2fb6c 100644 --- a/test/CodeGen/X86/vector-idiv-udiv-256.ll +++ b/test/CodeGen/X86/vector-idiv-udiv-256.ll @@ -174,7 +174,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [37,37,37,37,37,37,37,37] ; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1] @@ -453,7 +453,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37,37,37,37,37,37,37] ; AVX1-NEXT: vpmullw %xmm1, %xmm3, %xmm3 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1] diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll index 61297cc11d3..f2d205bb44e 100644 --- a/test/CodeGen/X86/widen_load-2.ll +++ b/test/CodeGen/X86/widen_load-2.ll @@ -372,10 +372,10 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; X86-NEXT: movdqa {{.*#+}} xmm0 = [40606,0,158,0] ; X86-NEXT: pextrw $0, %xmm0, (%edx) ; X86-NEXT: movb $-98, 2(%edx) -; X86-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; X86-NEXT: movdqa {{.*#+}} xmm0 = [257,0,1,0] ; X86-NEXT: pextrw $0, %xmm0, (%ecx) ; X86-NEXT: movb $1, 2(%ecx) ; X86-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero @@ -391,10 +391,10 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa ; ; X64-LABEL: rot: ; X64: # BB#0: # %entry -; X64-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; X64-NEXT: movdqa {{.*#+}} xmm0 = [40606,158] ; X64-NEXT: pextrw $0, %xmm0, (%rsi) ; X64-NEXT: movb $-98, 2(%rsi) -; X64-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; X64-NEXT: movdqa {{.*#+}} xmm0 = [257,1] ; X64-NEXT: pextrw $0, %xmm0, (%rdx) ; X64-NEXT: movb $1, 2(%rdx) ; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero