From: Craig Topper Date: Sat, 11 Feb 2017 05:32:57 +0000 (+0000) Subject: [X86] Don't base domain decisions on VEXTRACTF128/VINSERTF128 if only AVX1 is available. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ff0f1ca865f01ca11a2cbff1392808994450a50b;p=llvm [X86] Don't base domain decisions on VEXTRACTF128/VINSERTF128 if only AVX1 is available. Seems the execution dependency pass likes to use FP instructions when most of the consuming code is integer if a vextractf128 instruction produced the register. Without AVX2 we don't have the corresponding integer instruction available. This patch suppresses the domain on these instructions to GenericDomain if AVX2 is not supported so that they are ignored by domain fixing. If AVX2 is supported we'll report the correct domain and allow them to switch between integer and fp. Overall I think this produces better results in the modified test cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294824 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 716500a20b0..f6fac236368 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -8863,10 +8863,6 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = { { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, - { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, - { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, - { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, - { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }, { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm}, @@ -8878,6 +8874,14 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = { { X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 }, }; +static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = { + //PackedSingle PackedDouble PackedInt + { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, + { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, + { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, + { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, +}; + static const uint16_t ReplaceableInstrsAVX512[][4] = { // Two integer columns for 64-bit and 32-bit elements. //PackedSingle PackedDouble PackedInt PackedInt @@ -9139,6 +9143,12 @@ X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const { validDomains = 0xe; } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) { validDomains = Subtarget.hasAVX2() ? 0xe : 0x6; + } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) { + // Insert/extract instructions should only effect domain if AVX2 + // is enabled. + if (!Subtarget.hasAVX2()) + return std::make_pair(0, 0); + validDomains = 0xe; } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) { validDomains = 0xe; } else if (Subtarget.hasDQI() && lookupAVX512(opcode, domain, @@ -9167,6 +9177,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { "256-bit vector operations only available in AVX2"); table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2); } + if (!table) { // try the other table + assert(Subtarget.hasAVX2() && + "256-bit insert/extract only available in AVX2"); + table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract); + } if (!table) { // try the AVX512 table assert(Subtarget.hasAVX512() && "Requires AVX-512"); table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512); diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll index c18a98e05d1..f56f6182a36 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-256.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll @@ -625,7 +625,7 @@ define <4 x double> @merge_4f64_f64_34uz_volatile(double* %ptr) nounwind uwtable ; AVX1: # BB#0: ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX1-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -650,7 +650,7 @@ define <4 x double> @merge_4f64_f64_34uz_volatile(double* %ptr) nounwind uwtable ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X32-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X32-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X32-AVX-NEXT: retl %ptr0 = getelementptr inbounds double, double* %ptr, i64 3 diff --git a/test/CodeGen/X86/nontemporal-loads.ll b/test/CodeGen/X86/nontemporal-loads.ll index 53f4e8d0437..eaab26ef954 100644 --- a/test/CodeGen/X86/nontemporal-loads.ll +++ b/test/CodeGen/X86/nontemporal-loads.ll @@ -752,7 +752,7 @@ define <8 x i32> @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %src) { ; ; AVX1-LABEL: test_arg_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm1 +; AVX1-NEXT: vmovdqa (%rdi), %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 ; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 @@ -804,7 +804,7 @@ define <4 x i64> @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %src) { ; ; AVX1-LABEL: test_arg_v4i64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm1 +; AVX1-NEXT: vmovdqa (%rdi), %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 ; AVX1-NEXT: vpaddq %xmm3, %xmm2, %xmm2 @@ -835,7 +835,7 @@ define <16 x i16> @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %src) { ; ; AVX1-LABEL: test_arg_v16i16: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm1 +; AVX1-NEXT: vmovdqa (%rdi), %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 ; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2 @@ -866,7 +866,7 @@ define <32 x i8> @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %src) { ; ; AVX1-LABEL: test_arg_v32i8: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm1 +; AVX1-NEXT: vmovdqa (%rdi), %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 ; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2 @@ -925,8 +925,8 @@ define <16 x i32> @test_arg_v16i32(<16 x i32> %arg, <16 x i32>* %src) { ; ; AVX1-LABEL: test_arg_v16i32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm2 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm3 +; AVX1-NEXT: vmovdqa (%rdi), %ymm2 +; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 ; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4 @@ -989,8 +989,8 @@ define <8 x i64> @test_arg_v8i64(<8 x i64> %arg, <8 x i64>* %src) { ; ; AVX1-LABEL: test_arg_v8i64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm2 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm3 +; AVX1-NEXT: vmovdqa (%rdi), %ymm2 +; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 ; AVX1-NEXT: vpaddq %xmm5, %xmm4, %xmm4 @@ -1029,8 +1029,8 @@ define <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) { ; ; AVX1-LABEL: test_arg_v32i16: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm2 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm3 +; AVX1-NEXT: vmovdqa (%rdi), %ymm2 +; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 ; AVX1-NEXT: vpaddw %xmm5, %xmm4, %xmm4 @@ -1081,8 +1081,8 @@ define <64 x i8> @test_arg_v64i8(<64 x i8> %arg, <64 x i8>* %src) { ; ; AVX1-LABEL: test_arg_v64i8: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm2 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm3 +; AVX1-NEXT: vmovdqa (%rdi), %ymm2 +; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 ; AVX1-NEXT: vpaddb %xmm5, %xmm4, %xmm4 diff --git a/test/CodeGen/X86/oddshuffles.ll b/test/CodeGen/X86/oddshuffles.ll index 952db42842e..489fe944e74 100644 --- a/test/CodeGen/X86/oddshuffles.ll +++ b/test/CodeGen/X86/oddshuffles.ll @@ -923,7 +923,7 @@ define void @interleave_24i16_out(<24 x i16>* %p, <8 x i16>* %q1, <8 x i16>* %q2 ; AVX1-LABEL: interleave_24i16_out: ; AVX1: # BB#0: ; AVX1-NEXT: vmovdqu 32(%rdi), %xmm0 -; AVX1-NEXT: vmovups (%rdi), %ymm1 +; AVX1-NEXT: vmovdqu (%rdi), %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0],xmm2[1],xmm1[2,3],xmm2[4],xmm1[5,6],xmm2[7] ; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[0,1,6,7,12,13,2,3,8,9,14,15,12,13,14,15] @@ -1445,8 +1445,8 @@ define <2 x double> @wrongorder(<4 x double> %A, <8 x double>* %P) #0 { ; AVX1: # BB#0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 -; AVX1-NEXT: vmovapd %ymm1, 32(%rdi) -; AVX1-NEXT: vmovapd %ymm1, (%rdi) +; AVX1-NEXT: vmovaps %ymm1, 32(%rdi) +; AVX1-NEXT: vmovaps %ymm1, (%rdi) ; AVX1-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq diff --git a/test/CodeGen/X86/psubus.ll b/test/CodeGen/X86/psubus.ll index 3b228283ace..9fe800a4e9e 100644 --- a/test/CodeGen/X86/psubus.ll +++ b/test/CodeGen/X86/psubus.ll @@ -213,7 +213,7 @@ define void @test7(i16* nocapture %head) nounwind { ; ; AVX1-LABEL: test7: ; AVX1: ## BB#0: ## %vector.ph -; AVX1-NEXT: vmovups (%rdi), %ymm0 +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 @@ -257,13 +257,13 @@ define void @test8(i16* nocapture %head) nounwind { ; ; AVX1-LABEL: test8: ; AVX1: ## BB#0: ## %vector.ph -; AVX1-NEXT: vmovups (%rdi), %ymm0 +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [65534,65534,65534,65534,65534,65534,65534,65534] ; AVX1-NEXT: vpcmpgtw %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32769,32769,32769,32769,32769,32769,32769,32769] @@ -310,7 +310,7 @@ define void @test9(i16* nocapture %head, i16 zeroext %w) nounwind { ; ; AVX1-LABEL: test9: ; AVX1: ## BB#0: ## %vector.ph -; AVX1-NEXT: vmovups (%rdi), %ymm0 +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vmovd %esi, %xmm2 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7] @@ -364,7 +364,7 @@ define void @test10(i8* nocapture %head) nounwind { ; ; AVX1-LABEL: test10: ; AVX1: ## BB#0: ## %vector.ph -; AVX1-NEXT: vmovups (%rdi), %ymm0 +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 @@ -409,13 +409,13 @@ define void @test11(i8* nocapture %head) nounwind { ; ; AVX1-LABEL: test11: ; AVX1: ## BB#0: ## %vector.ph -; AVX1-NEXT: vmovups (%rdi), %ymm0 +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254] ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2 ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129] @@ -475,7 +475,7 @@ define void @test12(i8* nocapture %head, i8 zeroext %w) nounwind { ; ; AVX1-LABEL: test12: ; AVX1: ## BB#0: ## %vector.ph -; AVX1-NEXT: vmovups (%rdi), %ymm0 +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 ; AVX1-NEXT: vmovd %esi, %xmm1 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 @@ -589,7 +589,7 @@ define void @test13(i16* nocapture %head, i32* nocapture %w) nounwind { ; ; AVX1-LABEL: test13: ; AVX1: ## BB#0: ## %vector.ph -; AVX1-NEXT: vmovups (%rsi), %ymm0 +; AVX1-NEXT: vmovdqu (%rsi), %ymm0 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] @@ -598,7 +598,7 @@ define void @test13(i16* nocapture %head, i32* nocapture %w) nounwind { ; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6 ; AVX1-NEXT: vpcmpgtd %xmm4, %xmm6, %xmm4 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm6 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtd %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vpacksswb %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 @@ -791,8 +791,8 @@ define void @test14(i8* nocapture %head, i32* nocapture %w) nounwind { ; ; AVX1-LABEL: test14: ; AVX1: ## BB#0: ## %vector.ph -; AVX1-NEXT: vmovups (%rsi), %ymm0 -; AVX1-NEXT: vmovups 32(%rsi), %ymm1 +; AVX1-NEXT: vmovdqu (%rsi), %ymm0 +; AVX1-NEXT: vmovdqu 32(%rsi), %ymm1 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm8 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm9 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm10 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero @@ -803,17 +803,17 @@ define void @test14(i8* nocapture %head, i32* nocapture %w) nounwind { ; AVX1-NEXT: vpxor %xmm6, %xmm2, %xmm3 ; AVX1-NEXT: vpcmpgtd %xmm7, %xmm3, %xmm3 ; AVX1-NEXT: vpxor %xmm6, %xmm10, %xmm7 -; AVX1-NEXT: vxorps %xmm6, %xmm1, %xmm4 +; AVX1-NEXT: vpxor %xmm6, %xmm1, %xmm4 ; AVX1-NEXT: vpcmpgtd %xmm7, %xmm4, %xmm4 ; AVX1-NEXT: vpacksswb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm11 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> ; AVX1-NEXT: vpshufb %xmm11, %xmm3, %xmm12 ; AVX1-NEXT: vpxor %xmm6, %xmm9, %xmm7 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vxorps %xmm6, %xmm4, %xmm3 +; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm3 ; AVX1-NEXT: vpcmpgtd %xmm7, %xmm3, %xmm3 ; AVX1-NEXT: vpxor %xmm6, %xmm8, %xmm7 -; AVX1-NEXT: vxorps %xmm6, %xmm0, %xmm6 +; AVX1-NEXT: vpxor %xmm6, %xmm0, %xmm6 ; AVX1-NEXT: vpcmpgtd %xmm7, %xmm6, %xmm6 ; AVX1-NEXT: vpacksswb %xmm3, %xmm6, %xmm3 ; AVX1-NEXT: vpshufb %xmm11, %xmm3, %xmm3 @@ -966,7 +966,7 @@ define void @test15(i16* nocapture %head, i32* nocapture %w) nounwind { ; ; AVX1-LABEL: test15: ; AVX1: ## BB#0: ## %vector.ph -; AVX1-NEXT: vmovups (%rsi), %ymm0 +; AVX1-NEXT: vmovdqu (%rsi), %ymm0 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] @@ -975,7 +975,7 @@ define void @test15(i16* nocapture %head, i32* nocapture %w) nounwind { ; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6 ; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm4 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm6 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3 ; AVX1-NEXT: vpacksswb %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 @@ -1096,7 +1096,7 @@ define void @test16(i16* nocapture %head, i32* nocapture %w) nounwind { ; ; AVX1-LABEL: test16: ; AVX1: ## BB#0: ## %vector.ph -; AVX1-NEXT: vmovups (%rsi), %ymm0 +; AVX1-NEXT: vmovdqu (%rsi), %ymm0 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] @@ -1105,7 +1105,7 @@ define void @test16(i16* nocapture %head, i32* nocapture %w) nounwind { ; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6 ; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm4 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm6 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3 ; AVX1-NEXT: vpacksswb %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 diff --git a/test/CodeGen/X86/subvector-broadcast.ll b/test/CodeGen/X86/subvector-broadcast.ll index 5082101a6d4..21817c4654b 100644 --- a/test/CodeGen/X86/subvector-broadcast.ll +++ b/test/CodeGen/X86/subvector-broadcast.ll @@ -1298,7 +1298,7 @@ define void @fallback_broadcast_v4i64_to_v8i64(<4 x i64> %a, <8 x i64> %b) { ; X32-AVX1-LABEL: fallback_broadcast_v4i64_to_v8i64: ; X32-AVX1: ## BB#0: ## %entry ; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [1,0,2,0,3,0,4,0] +; X32-AVX1-NEXT: vmovdqa {{.*#+}} ymm4 = [1,0,2,0,3,0,4,0] ; X32-AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5 ; X32-AVX1-NEXT: vpaddq %xmm5, %xmm3, %xmm3 ; X32-AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm0 diff --git a/test/CodeGen/X86/unaligned-32-byte-memops.ll b/test/CodeGen/X86/unaligned-32-byte-memops.ll index b9deb058cb3..391f7a38a37 100644 --- a/test/CodeGen/X86/unaligned-32-byte-memops.ll +++ b/test/CodeGen/X86/unaligned-32-byte-memops.ll @@ -254,7 +254,7 @@ define <32 x i8> @combine_16_byte_loads_i8(<16 x i8>* %ptr, <32 x i8> %x) { define <4 x double> @combine_16_byte_loads_double(<2 x double>* %ptr, <4 x double> %x) { ; AVXSLOW-LABEL: combine_16_byte_loads_double: ; AVXSLOW: # BB#0: -; AVXSLOW-NEXT: vmovupd 144(%rdi), %xmm1 +; AVXSLOW-NEXT: vmovups 144(%rdi), %xmm1 ; AVXSLOW-NEXT: vinsertf128 $1, 160(%rdi), %ymm1, %ymm1 ; AVXSLOW-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ; AVXSLOW-NEXT: retq diff --git a/test/CodeGen/X86/vec_fptrunc.ll b/test/CodeGen/X86/vec_fptrunc.ll index 841ac8a44da..e6a0d52c5ae 100644 --- a/test/CodeGen/X86/vec_fptrunc.ll +++ b/test/CodeGen/X86/vec_fptrunc.ll @@ -102,7 +102,7 @@ define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) { ; X32-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0 ; X32-AVX-NEXT: vcvtpd2psy 32(%ecx), %xmm1 ; X32-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; X32-AVX-NEXT: vmovupd %ymm0, (%eax) +; X32-AVX-NEXT: vmovups %ymm0, (%eax) ; X32-AVX-NEXT: vzeroupper ; X32-AVX-NEXT: retl ; @@ -123,7 +123,7 @@ define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) { ; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0 ; X64-AVX-NEXT: vcvtpd2psy 32(%rdi), %xmm1 ; X64-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; X64-AVX-NEXT: vmovupd %ymm0, (%rsi) +; X64-AVX-NEXT: vmovups %ymm0, (%rsi) ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq entry: diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index 923af1216d0..d3f286d9fae 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -2723,7 +2723,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; ; AVX1-LABEL: sitofp_load_4i64_to_4f64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovdqa (%rdi), %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 @@ -3130,7 +3130,7 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; ; AVX1-LABEL: uitofp_load_4i64_to_4f64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovdqa (%rdi), %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] diff --git a/test/CodeGen/X86/vec_minmax_uint.ll b/test/CodeGen/X86/vec_minmax_uint.ll index d9b6e12e409..ec5f83ea396 100644 --- a/test/CodeGen/X86/vec_minmax_uint.ll +++ b/test/CodeGen/X86/vec_minmax_uint.ll @@ -167,13 +167,13 @@ define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: max_gt_v4i64: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 @@ -604,15 +604,15 @@ define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: max_ge_v4i64: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm5 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm5 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 @@ -1031,13 +1031,13 @@ define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: min_lt_v4i64: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 @@ -1467,15 +1467,15 @@ define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: min_le_v4i64: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm5 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm5 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 diff --git a/test/CodeGen/X86/vector-bitreverse.ll b/test/CodeGen/X86/vector-bitreverse.ll index beec58bdaf7..226c0adbaf3 100644 --- a/test/CodeGen/X86/vector-bitreverse.ll +++ b/test/CodeGen/X86/vector-bitreverse.ll @@ -613,8 +613,8 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind { ; AVX1-LABEL: test_bitreverse_v32i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 @@ -622,7 +622,7 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] ; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1 ; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 @@ -1361,8 +1361,8 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind { ; AVX1-LABEL: test_bitreverse_v64i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] ; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2 @@ -1370,7 +1370,7 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind { ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] ; AVX1-NEXT: vpshufb %xmm2, %xmm6, %xmm2 ; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm4 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm4 ; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 @@ -1378,13 +1378,13 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind { ; AVX1-NEXT: vpor %xmm0, %xmm4, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm4 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpshufb %xmm2, %xmm6, %xmm2 ; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm4 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm4 ; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 diff --git a/test/CodeGen/X86/vector-compare-results.ll b/test/CodeGen/X86/vector-compare-results.ll index c34f333ef78..06dc1eae586 100644 --- a/test/CodeGen/X86/vector-compare-results.ll +++ b/test/CodeGen/X86/vector-compare-results.ll @@ -10187,24 +10187,24 @@ define <32 x i1> @test_cmp_v32i64(<32 x i64> %a0, <32 x i64> %a1) nounwind { ; AVX1-NEXT: movq %rsp, %rbp ; AVX1-NEXT: andq $-32, %rsp ; AVX1-NEXT: subq $32, %rsp -; AVX1-NEXT: vmovaps 240(%rbp), %ymm8 +; AVX1-NEXT: vmovdqa 240(%rbp), %ymm8 ; AVX1-NEXT: vextractf128 $1, %ymm8, %xmm9 ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm10 ; AVX1-NEXT: vpcmpgtq %xmm9, %xmm10, %xmm9 -; AVX1-NEXT: vmovaps 208(%rbp), %ymm10 +; AVX1-NEXT: vmovdqa 208(%rbp), %ymm10 ; AVX1-NEXT: vpcmpgtq %xmm8, %xmm7, %xmm7 ; AVX1-NEXT: vpacksswb %xmm9, %xmm7, %xmm8 ; AVX1-NEXT: vextractf128 $1, %ymm10, %xmm9 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm9, %xmm7, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm10, %xmm6, %xmm6 -; AVX1-NEXT: vmovaps 176(%rbp), %ymm9 +; AVX1-NEXT: vmovdqa 176(%rbp), %ymm9 ; AVX1-NEXT: vpacksswb %xmm7, %xmm6, %xmm6 ; AVX1-NEXT: vpacksswb %xmm8, %xmm6, %xmm8 ; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm7 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm6 -; AVX1-NEXT: vmovaps 144(%rbp), %ymm10 +; AVX1-NEXT: vmovdqa 144(%rbp), %ymm10 ; AVX1-NEXT: vpcmpgtq %xmm9, %xmm5, %xmm5 ; AVX1-NEXT: vpacksswb %xmm6, %xmm5, %xmm5 ; AVX1-NEXT: vextractf128 $1, %ymm10, %xmm6 @@ -10212,26 +10212,26 @@ define <32 x i1> @test_cmp_v32i64(<32 x i64> %a0, <32 x i64> %a1) nounwind { ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm10, %xmm4, %xmm4 ; AVX1-NEXT: vpacksswb %xmm6, %xmm4, %xmm4 -; AVX1-NEXT: vmovaps 112(%rbp), %ymm6 +; AVX1-NEXT: vmovdqa 112(%rbp), %ymm6 ; AVX1-NEXT: vpacksswb %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpacksswb %xmm8, %xmm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm5 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm5, %xmm7, %xmm5 -; AVX1-NEXT: vmovaps 80(%rbp), %ymm7 +; AVX1-NEXT: vmovdqa 80(%rbp), %ymm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vpacksswb %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm5 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm2, %xmm2 -; AVX1-NEXT: vmovaps 48(%rbp), %ymm6 +; AVX1-NEXT: vmovdqa 48(%rbp), %ymm6 ; AVX1-NEXT: vpacksswb %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3 -; AVX1-NEXT: vmovaps 16(%rbp), %ymm5 +; AVX1-NEXT: vmovdqa 16(%rbp), %ymm5 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm1, %xmm1 ; AVX1-NEXT: vpacksswb %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3 diff --git a/test/CodeGen/X86/vector-half-conversions.ll b/test/CodeGen/X86/vector-half-conversions.ll index 5bf6fbeb623..c870022b26d 100644 --- a/test/CodeGen/X86/vector-half-conversions.ll +++ b/test/CodeGen/X86/vector-half-conversions.ll @@ -4315,7 +4315,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) nounwind { ; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %r14d ; AVX1-NEXT: orl %ebx, %r14d -; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload +; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -4462,7 +4462,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind { ; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %r14d ; AVX1-NEXT: orl %ebx, %r14d -; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload +; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -4616,7 +4616,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind { ; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %r14d ; AVX1-NEXT: orl %ebx, %r14d -; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload +; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -4774,7 +4774,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) nounwind { ; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %r15d ; AVX1-NEXT: orl %ebx, %r15d -; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -4799,7 +4799,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) nounwind { ; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %r15d ; AVX1-NEXT: orl %ebx, %r15d -; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload +; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -5077,7 +5077,7 @@ define void @store_cvt_4f64_to_4i16(<4 x double> %a0, <4 x i16>* %a1) nounwind { ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %r14d -; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -5235,7 +5235,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun ; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %ebx ; AVX1-NEXT: orl %ebp, %ebx -; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload +; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -5406,7 +5406,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw ; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %ebx ; AVX1-NEXT: orl %ebp, %ebx -; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload +; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -5576,7 +5576,7 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) nounwind { ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -5587,7 +5587,7 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) nounwind { ; AVX1-NEXT: # xmm0 = mem[1,0] ; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %r12d -; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] diff --git a/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/test/CodeGen/X86/vector-idiv-sdiv-256.ll index a1727ea4f70..8cdab462806 100644 --- a/test/CodeGen/X86/vector-idiv-sdiv-256.ll +++ b/test/CodeGen/X86/vector-idiv-sdiv-256.ll @@ -87,7 +87,7 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind { define <8 x i32> @test_div7_8i32(<8 x i32> %a) nounwind { ; AVX1-LABEL: test_div7_8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] +; AVX1-NEXT: vmovdqa {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 @@ -348,7 +348,7 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { define <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind { ; AVX1-LABEL: test_rem7_8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] +; AVX1-NEXT: vmovdqa {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 diff --git a/test/CodeGen/X86/vector-idiv-udiv-256.ll b/test/CodeGen/X86/vector-idiv-udiv-256.ll index c11ee22d647..7a2f99730e2 100644 --- a/test/CodeGen/X86/vector-idiv-udiv-256.ll +++ b/test/CodeGen/X86/vector-idiv-udiv-256.ll @@ -359,7 +359,7 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { define <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind { ; AVX1-LABEL: test_rem7_8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757] +; AVX1-NEXT: vmovdqa {{.*#+}} ymm1 = [613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757] ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 diff --git a/test/CodeGen/X86/vector-lzcnt-256.ll b/test/CodeGen/X86/vector-lzcnt-256.ll index 04f74c94759..53cb4d8e445 100644 --- a/test/CodeGen/X86/vector-lzcnt-256.ll +++ b/test/CodeGen/X86/vector-lzcnt-256.ll @@ -11,8 +11,8 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { ; AVX1-LABEL: testv4i64: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm1 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] ; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm5 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm1 @@ -37,7 +37,7 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { ; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5 ; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm5 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm5 ; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm6 ; AVX1-NEXT: vpand %xmm3, %xmm6, %xmm3 @@ -143,8 +143,8 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind { ; AVX1-LABEL: testv4i64u: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm1 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] ; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm5 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm1 @@ -169,7 +169,7 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind { ; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5 ; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm5 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm5 ; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm6 ; AVX1-NEXT: vpand %xmm3, %xmm6, %xmm3 @@ -275,8 +275,8 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { ; AVX1-LABEL: testv8i32: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm5 @@ -296,7 +296,7 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1 ; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3 ; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm5 ; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 @@ -387,8 +387,8 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind { ; AVX1-LABEL: testv8i32u: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm5 @@ -408,7 +408,7 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind { ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1 ; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3 ; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm5 ; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 @@ -499,8 +499,8 @@ define <16 x i16> @testv16i16(<16 x i16> %in) nounwind { ; AVX1-LABEL: testv16i16: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm5 @@ -515,7 +515,7 @@ define <16 x i16> @testv16i16(<16 x i16> %in) nounwind { ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpaddw %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm5 ; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 @@ -586,8 +586,8 @@ define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind { ; AVX1-LABEL: testv16i16u: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm5 @@ -602,7 +602,7 @@ define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind { ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpaddw %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm5 ; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 @@ -673,8 +673,8 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind { ; AVX1-LABEL: testv32i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 @@ -684,7 +684,7 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind { ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 @@ -747,8 +747,8 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind { ; AVX1-LABEL: testv32i8u: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 @@ -758,7 +758,7 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind { ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 diff --git a/test/CodeGen/X86/vector-popcnt-256.ll b/test/CodeGen/X86/vector-popcnt-256.ll index 8bbfea93442..7a675619d72 100644 --- a/test/CodeGen/X86/vector-popcnt-256.ll +++ b/test/CodeGen/X86/vector-popcnt-256.ll @@ -6,8 +6,8 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { ; AVX1-LABEL: testv4i64: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 @@ -16,7 +16,7 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { ; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm5 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5 ; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 @@ -47,8 +47,8 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { ; AVX1-LABEL: testv8i32: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 @@ -61,7 +61,7 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1 -; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm5 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5 ; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 @@ -145,15 +145,15 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind { ; AVX1-LABEL: testv32i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 diff --git a/test/CodeGen/X86/vector-trunc-math.ll b/test/CodeGen/X86/vector-trunc-math.ll index 7383f2795a8..41a9aabd7a7 100644 --- a/test/CodeGen/X86/vector-trunc-math.ll +++ b/test/CodeGen/X86/vector-trunc-math.ll @@ -621,22 +621,22 @@ define <16 x i8> @trunc_add_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX1-LABEL: trunc_add_const_v16i64_v16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -717,13 +717,13 @@ define <16 x i8> @trunc_add_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX1-LABEL: trunc_add_const_v16i32_v16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 @@ -3401,22 +3401,22 @@ define <16 x i8> @trunc_and_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX1-LABEL: trunc_and_const_v16i64_v16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -3497,13 +3497,13 @@ define <16 x i8> @trunc_and_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX1-LABEL: trunc_and_const_v16i32_v16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 @@ -4137,22 +4137,22 @@ define <16 x i8> @trunc_xor_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX1-LABEL: trunc_xor_const_v16i64_v16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -4233,13 +4233,13 @@ define <16 x i8> @trunc_xor_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX1-LABEL: trunc_xor_const_v16i32_v16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 @@ -4873,22 +4873,22 @@ define <16 x i8> @trunc_or_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX1-LABEL: trunc_or_const_v16i64_v16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -4969,13 +4969,13 @@ define <16 x i8> @trunc_or_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX1-LABEL: trunc_or_const_v16i32_v16i8: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 diff --git a/test/CodeGen/X86/vector-trunc.ll b/test/CodeGen/X86/vector-trunc.ll index 1c115f79156..701e0a247b9 100644 --- a/test/CodeGen/X86/vector-trunc.ll +++ b/test/CodeGen/X86/vector-trunc.ll @@ -144,13 +144,13 @@ define void @trunc8i64_8i8(<8 x i64> %a) { ; AVX1-LABEL: trunc8i64_8i8: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 @@ -435,13 +435,13 @@ define void @trunc16i32_16i8(<16 x i32> %a) { ; AVX1-LABEL: trunc16i32_16i8: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovdqu %xmm0, (%rax) diff --git a/test/CodeGen/X86/viabs.ll b/test/CodeGen/X86/viabs.ll index 61fb66bbcbb..da743bc6c4f 100644 --- a/test/CodeGen/X86/viabs.ll +++ b/test/CodeGen/X86/viabs.ll @@ -611,8 +611,8 @@ define <8 x i64> @test_abs_le_v8i64_fold(<8 x i64>* %a.ptr) nounwind { ; ; AVX1-LABEL: test_abs_le_v8i64_fold: ; AVX1: # BB#0: -; AVX1-NEXT: vmovups (%rdi), %ymm0 -; AVX1-NEXT: vmovups 32(%rdi), %ymm1 +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 +; AVX1-NEXT: vmovdqu 32(%rdi), %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] diff --git a/test/CodeGen/X86/vselect-minmax.ll b/test/CodeGen/X86/vselect-minmax.ll index afe5f661559..5524eaf397c 100644 --- a/test/CodeGen/X86/vselect-minmax.ll +++ b/test/CodeGen/X86/vselect-minmax.ll @@ -5418,22 +5418,22 @@ define <8 x i64> @test125(<8 x i64> %a, <8 x i64> %b) { ; AVX1-LABEL: test125: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 -; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 ; AVX1-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0 @@ -5589,26 +5589,26 @@ define <8 x i64> @test126(<8 x i64> %a, <8 x i64> %b) { ; AVX1-LABEL: test126: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8 ; AVX1-NEXT: vpxor %xmm8, %xmm4, %xmm4 -; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm6 ; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vpxor %xmm8, %xmm5, %xmm5 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 @@ -5746,22 +5746,22 @@ define <8 x i64> @test127(<8 x i64> %a, <8 x i64> %b) { ; AVX1-LABEL: test127: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 -; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 ; AVX1-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0 @@ -5918,26 +5918,26 @@ define <8 x i64> @test128(<8 x i64> %a, <8 x i64> %b) { ; AVX1-LABEL: test128: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8 ; AVX1-NEXT: vpxor %xmm8, %xmm4, %xmm4 -; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm6 ; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vpxor %xmm8, %xmm5, %xmm5 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 @@ -8134,22 +8134,22 @@ define <8 x i64> @test157(<8 x i64> %a, <8 x i64> %b) { ; AVX1-LABEL: test157: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 -; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 ; AVX1-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0 @@ -8302,26 +8302,26 @@ define <8 x i64> @test158(<8 x i64> %a, <8 x i64> %b) { ; AVX1-LABEL: test158: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8 ; AVX1-NEXT: vpxor %xmm8, %xmm4, %xmm4 -; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm6 ; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vpxor %xmm8, %xmm5, %xmm5 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 @@ -8460,22 +8460,22 @@ define <8 x i64> @test159(<8 x i64> %a, <8 x i64> %b) { ; AVX1-LABEL: test159: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 -; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 ; AVX1-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0 @@ -8629,26 +8629,26 @@ define <8 x i64> @test160(<8 x i64> %a, <8 x i64> %b) { ; AVX1-LABEL: test160: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8 ; AVX1-NEXT: vpxor %xmm8, %xmm4, %xmm4 -; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm6 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm6 ; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7 ; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6 -; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm7 -; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm7 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vpxor %xmm8, %xmm5, %xmm5 ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 @@ -9066,13 +9066,13 @@ define <4 x i64> @test165(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: test165: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 @@ -9164,15 +9164,15 @@ define <4 x i64> @test166(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: test166: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm5 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm5 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 @@ -9258,13 +9258,13 @@ define <4 x i64> @test167(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: test167: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 @@ -9356,15 +9356,15 @@ define <4 x i64> @test168(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: test168: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm5 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm5 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 @@ -9777,13 +9777,13 @@ define <4 x i64> @test173(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: test173: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 @@ -9874,15 +9874,15 @@ define <4 x i64> @test174(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: test174: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm5 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm5 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 @@ -9969,13 +9969,13 @@ define <4 x i64> @test175(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: test175: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 @@ -10066,15 +10066,15 @@ define <4 x i64> @test176(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: test176: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 ; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm5 -; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm5 +; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 diff --git a/test/CodeGen/X86/x86-interleaved-access.ll b/test/CodeGen/X86/x86-interleaved-access.ll index 1fc1b43b040..6fbec91e77a 100644 --- a/test/CodeGen/X86/x86-interleaved-access.ll +++ b/test/CodeGen/X86/x86-interleaved-access.ll @@ -53,17 +53,29 @@ define <4 x double> @load_factorf64_2(<16 x double>* %ptr) { } define <4 x double> @load_factorf64_1(<16 x double>* %ptr) { -; AVX-LABEL: load_factorf64_1: -; AVX: # BB#0: -; AVX-NEXT: vmovupd (%rdi), %ymm0 -; AVX-NEXT: vmovupd 32(%rdi), %ymm1 -; AVX-NEXT: vmovupd 64(%rdi), %ymm2 -; AVX-NEXT: vmovupd 96(%rdi), %ymm3 -; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 -; AVX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX-NEXT: vmulpd %ymm0, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: load_factorf64_1: +; AVX1: # BB#0: +; AVX1-NEXT: vmovups (%rdi), %ymm0 +; AVX1-NEXT: vmovups 32(%rdi), %ymm1 +; AVX1-NEXT: vmovups 64(%rdi), %ymm2 +; AVX1-NEXT: vmovups 96(%rdi), %ymm3 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX1-NEXT: vmulpd %ymm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: load_factorf64_1: +; AVX2: # BB#0: +; AVX2-NEXT: vmovupd (%rdi), %ymm0 +; AVX2-NEXT: vmovupd 32(%rdi), %ymm1 +; AVX2-NEXT: vmovupd 64(%rdi), %ymm2 +; AVX2-NEXT: vmovupd 96(%rdi), %ymm3 +; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX2-NEXT: vmulpd %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: retq %wide.vec = load <16 x double>, <16 x double>* %ptr, align 16 %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32>