From: Simon Pilgrim Date: Sat, 21 Oct 2017 20:19:48 +0000 (+0000) Subject: [X86][SSE] Add extractps/pextrd equivalence to domain tables X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8d6bf15df655f7c5fc3b857e4c0920da1b24675f;p=llvm [X86][SSE] Add extractps/pextrd equivalence to domain tables Differential Revision: https://reviews.llvm.org/D39135 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316274 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ae4d123e4cf..9a64c357f02 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -9451,6 +9451,8 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr }, { X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm }, { X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr }, + { X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr }, + { X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr }, // AVX 128-bit support { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, @@ -9479,6 +9481,8 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr }, { X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm }, { X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr }, + { X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr }, + { X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr }, // AVX 256-bit support { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, @@ -9577,6 +9581,8 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr }, { X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm }, { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr }, + { X86::VEXTRACTPSZmr, X86::VEXTRACTPSZmr, X86::VPEXTRDZmr }, + { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZrr, X86::VPEXTRDZrr }, }; static const uint16_t ReplaceableInstrsAVX2[][3] = { diff --git a/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/test/CodeGen/X86/2011-10-19-widen_vselect.ll index 997d2fd1ae7..416761ffef4 100644 --- a/test/CodeGen/X86/2011-10-19-widen_vselect.ll +++ b/test/CodeGen/X86/2011-10-19-widen_vselect.ll @@ -49,9 +49,9 @@ entry: define void @zero_test() { ; X32-LABEL: zero_test: ; X32: # BB#0: # %entry -; X32-NEXT: pxor %xmm0, %xmm0 -; X32-NEXT: pextrd $1, %xmm0, (%eax) -; X32-NEXT: movd %xmm0, (%eax) +; X32-NEXT: xorps %xmm0, %xmm0 +; X32-NEXT: extractps $1, %xmm0, (%eax) +; X32-NEXT: movss %xmm0, (%eax) ; X32-NEXT: retl ; ; X64-LABEL: zero_test: diff --git a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll index 3439ebcf9de..c87b04485e4 100644 --- a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll +++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll @@ -10,12 +10,12 @@ define <4 x i32> @test(<4 x i32>* %p) { ; CHECK-LABEL: test: ; CHECK: # BB#0: -; CHECK-NEXT: movdqa (%rdi), %xmm0 -; CHECK-NEXT: pextrd $2, %xmm0, %eax +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: extractps $2, %xmm0, %eax ; CHECK-NEXT: cmpl $3, %eax ; CHECK-NEXT: je .LBB0_2 ; CHECK-NEXT: # BB#1: -; CHECK-NEXT: pxor %xmm0, %xmm0 +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: retq %v = load <4 x i32>, <4 x i32>* %p diff --git a/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx-intrinsics-fast-isel.ll index e50f9396217..1ae93dc747f 100644 --- a/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -792,14 +792,14 @@ define i32 @test_mm256_extract_epi32(<4 x i64> %a0) nounwind { ; X32-LABEL: test_mm256_extract_epi32: ; X32: # BB#0: ; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X32-NEXT: vpextrd $1, %xmm0, %eax +; X32-NEXT: vextractps $1, %xmm0, %eax ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_extract_epi32: ; X64: # BB#0: ; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X64-NEXT: vpextrd $1, %xmm0, %eax +; X64-NEXT: vextractps $1, %xmm0, %eax ; X64-NEXT: vzeroupper ; X64-NEXT: retq %arg0 = bitcast <4 x i64> %a0 to <8 x i32> @@ -811,8 +811,8 @@ define i64 @test_mm256_extract_epi64(<4 x i64> %a0) nounwind { ; X32-LABEL: test_mm256_extract_epi64: ; X32: # BB#0: ; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X32-NEXT: vpextrd $2, %xmm0, %eax -; X32-NEXT: vpextrd $3, %xmm0, %edx +; X32-NEXT: vextractps $2, %xmm0, %eax +; X32-NEXT: vextractps $3, %xmm0, %edx ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index 7db93c9e2db..bff09d6b000 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -424,9 +424,9 @@ define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) { define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) { ; CHECK-LABEL: extract_v16i32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpextrd $1, %xmm0, %eax -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi) +; CHECK-NEXT: vextractps $1, %xmm0, %eax +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vextractps $1, %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %r1 = extractelement <16 x i32> %x, i32 1 @@ -438,9 +438,9 @@ define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) { define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) { ; CHECK-LABEL: extract_v8i32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpextrd $1, %xmm0, %eax -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi) +; CHECK-NEXT: vextractps $1, %xmm0, %eax +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vextractps $1, %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %r1 = extractelement <8 x i32> %x, i32 1 @@ -452,8 +452,8 @@ define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) { define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) { ; CHECK-LABEL: extract_v4i32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpextrd $1, %xmm0, %eax -; CHECK-NEXT: vpextrd $3, %xmm0, (%rdi) +; CHECK-NEXT: vextractps $1, %xmm0, %eax +; CHECK-NEXT: vextractps $3, %xmm0, (%rdi) ; CHECK-NEXT: retq %r1 = extractelement <4 x i32> %x, i32 1 %r2 = extractelement <4 x i32> %x, i32 3 diff --git a/test/CodeGen/X86/extract-store.ll b/test/CodeGen/X86/extract-store.ll index e39f3f170a2..225d2e9a107 100644 --- a/test/CodeGen/X86/extract-store.ll +++ b/test/CodeGen/X86/extract-store.ll @@ -285,23 +285,23 @@ define void @extract_i32_3(i32* nocapture %dst, <4 x i32> %foo) nounwind { ; SSE41-X32-LABEL: extract_i32_3: ; SSE41-X32: # BB#0: ; SSE41-X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE41-X32-NEXT: pextrd $3, %xmm0, (%eax) +; SSE41-X32-NEXT: extractps $3, %xmm0, (%eax) ; SSE41-X32-NEXT: retl ; ; SSE41-X64-LABEL: extract_i32_3: ; SSE41-X64: # BB#0: -; SSE41-X64-NEXT: pextrd $3, %xmm0, (%rdi) +; SSE41-X64-NEXT: extractps $3, %xmm0, (%rdi) ; SSE41-X64-NEXT: retq ; ; AVX-X32-LABEL: extract_i32_3: ; AVX-X32: # BB#0: ; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX-X32-NEXT: vpextrd $3, %xmm0, (%eax) +; AVX-X32-NEXT: vextractps $3, %xmm0, (%eax) ; AVX-X32-NEXT: retl ; ; AVX-X64-LABEL: extract_i32_3: ; AVX-X64: # BB#0: -; AVX-X64-NEXT: vpextrd $3, %xmm0, (%rdi) +; AVX-X64-NEXT: vextractps $3, %xmm0, (%rdi) ; AVX-X64-NEXT: retq ; ; SSE-F128-LABEL: extract_i32_3: diff --git a/test/CodeGen/X86/extractelement-index.ll b/test/CodeGen/X86/extractelement-index.ll index 228ce70b400..8a6cdaf203c 100644 --- a/test/CodeGen/X86/extractelement-index.ll +++ b/test/CodeGen/X86/extractelement-index.ll @@ -231,12 +231,12 @@ define i32 @extractelement_v4i32_3(<4 x i32> %a) nounwind { ; ; SSE41-LABEL: extractelement_v4i32_3: ; SSE41: # BB#0: -; SSE41-NEXT: pextrd $3, %xmm0, %eax +; SSE41-NEXT: extractps $3, %xmm0, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: extractelement_v4i32_3: ; AVX: # BB#0: -; AVX-NEXT: vpextrd $3, %xmm0, %eax +; AVX-NEXT: vextractps $3, %xmm0, %eax ; AVX-NEXT: retq %b = extractelement <4 x i32> %a, i256 3 ret i32 %b @@ -297,22 +297,15 @@ define i32 @extractelement_v8i32_7(<8 x i32> %a) nounwind { ; ; SSE41-LABEL: extractelement_v8i32_7: ; SSE41: # BB#0: -; SSE41-NEXT: pextrd $3, %xmm1, %eax +; SSE41-NEXT: extractps $3, %xmm1, %eax ; SSE41-NEXT: retq ; -; AVX1-LABEL: extractelement_v8i32_7: -; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpextrd $3, %xmm0, %eax -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: extractelement_v8i32_7: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpextrd $3, %xmm0, %eax -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: extractelement_v8i32_7: +; AVX: # BB#0: +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vextractps $3, %xmm0, %eax +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq %b = extractelement <8 x i32> %a, i64 7 ret i32 %b } diff --git a/test/CodeGen/X86/known-signbits-vector.ll b/test/CodeGen/X86/known-signbits-vector.ll index ec620b8ce87..7506062b615 100644 --- a/test/CodeGen/X86/known-signbits-vector.ll +++ b/test/CodeGen/X86/known-signbits-vector.ll @@ -76,7 +76,7 @@ define float @signbits_ashr_extract_sitofp(<2 x i64> %a0) nounwind { ; X32-LABEL: signbits_ashr_extract_sitofp: ; X32: # BB#0: ; X32-NEXT: pushl %eax -; X32-NEXT: vpextrd $1, %xmm0, %eax +; X32-NEXT: vextractps $1, %xmm0, %eax ; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X32-NEXT: vmovss %xmm0, (%esp) ; X32-NEXT: flds (%esp) diff --git a/test/CodeGen/X86/nontemporal-2.ll b/test/CodeGen/X86/nontemporal-2.ll index 9a7897edbea..b6f2314b31e 100644 --- a/test/CodeGen/X86/nontemporal-2.ll +++ b/test/CodeGen/X86/nontemporal-2.ll @@ -541,19 +541,19 @@ define void @test_extract_i32(<4 x i32> %arg, i32* %dst) { ; ; SSE41-LABEL: test_extract_i32: ; SSE41: # BB#0: -; SSE41-NEXT: pextrd $1, %xmm0, %eax +; SSE41-NEXT: extractps $1, %xmm0, %eax ; SSE41-NEXT: movntil %eax, (%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_extract_i32: ; AVX: # BB#0: -; AVX-NEXT: vpextrd $1, %xmm0, %eax +; AVX-NEXT: vextractps $1, %xmm0, %eax ; AVX-NEXT: movntil %eax, (%rdi) ; AVX-NEXT: retq ; ; VLX-LABEL: test_extract_i32: ; VLX: # BB#0: -; VLX-NEXT: vpextrd $1, %xmm0, %eax +; VLX-NEXT: vextractps $1, %xmm0, %eax ; VLX-NEXT: movntil %eax, (%rdi) ; VLX-NEXT: retq %1 = extractelement <4 x i32> %arg, i32 1 diff --git a/test/CodeGen/X86/oddshuffles.ll b/test/CodeGen/X86/oddshuffles.ll index 573111d5d25..02a399b4898 100644 --- a/test/CodeGen/X86/oddshuffles.ll +++ b/test/CodeGen/X86/oddshuffles.ll @@ -112,10 +112,10 @@ define void @v3i32(<2 x i32> %a, <2 x i32> %b, <3 x i32>* %p) nounwind { ; ; AVX2-LABEL: v3i32: ; AVX2: # BB#0: -; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 -; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX2-NEXT: vpextrd $2, %xmm0, 8(%rdi) -; AVX2-NEXT: vmovq %xmm1, (%rdi) +; AVX2-NEXT: vbroadcastss %xmm1, %xmm1 +; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] +; AVX2-NEXT: vextractps $2, %xmm0, 8(%rdi) +; AVX2-NEXT: vmovlps %xmm1, (%rdi) ; AVX2-NEXT: retq ; ; XOP-LABEL: v3i32: @@ -199,18 +199,18 @@ define void @v5i32(<4 x i32> %a, <4 x i32> %b, <5 x i32>* %p) nounwind { ; AVX1: # BB#0: ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] -; AVX1-NEXT: vpextrd $3, %xmm0, 16(%rdi) +; AVX1-NEXT: vextractps $3, %xmm0, 16(%rdi) ; AVX1-NEXT: vmovaps %xmm1, (%rdi) ; AVX1-NEXT: retq ; ; AVX2-LABEL: v5i32: ; AVX2: # BB#0: ; AVX2-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u> -; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpextrd $3, %xmm0, 16(%rdi) -; AVX2-NEXT: vmovdqa %xmm1, (%rdi) +; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 +; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u> +; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vextractps $3, %xmm0, 16(%rdi) +; AVX2-NEXT: vmovaps %xmm1, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -218,7 +218,7 @@ define void @v5i32(<4 x i32> %a, <4 x i32> %b, <5 x i32>* %p) nounwind { ; XOP: # BB#0: ; XOP-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] ; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] -; XOP-NEXT: vpextrd $3, %xmm0, 16(%rdi) +; XOP-NEXT: vextractps $3, %xmm0, 16(%rdi) ; XOP-NEXT: vmovaps %xmm1, (%rdi) ; XOP-NEXT: retq %r = shufflevector <4 x i32> %a, <4 x i32> %b, <5 x i32> diff --git a/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll index acf91cbeea1..b35c9766c16 100644 --- a/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll @@ -440,12 +440,12 @@ define i32 @test_mm_extract_epi8(<2 x i64> %a0) { define i32 @test_mm_extract_epi32(<2 x i64> %a0) { ; X32-LABEL: test_mm_extract_epi32: ; X32: # BB#0: -; X32-NEXT: pextrd $1, %xmm0, %eax +; X32-NEXT: extractps $1, %xmm0, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_extract_epi32: ; X64: # BB#0: -; X64-NEXT: pextrd $1, %xmm0, %eax +; X64-NEXT: extractps $1, %xmm0, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %ext = extractelement <4 x i32> %arg0, i32 1 @@ -455,8 +455,8 @@ define i32 @test_mm_extract_epi32(<2 x i64> %a0) { define i64 @test_mm_extract_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm_extract_epi64: ; X32: # BB#0: -; X32-NEXT: pextrd $2, %xmm0, %eax -; X32-NEXT: pextrd $3, %xmm0, %edx +; X32-NEXT: extractps $2, %xmm0, %eax +; X32-NEXT: extractps $3, %xmm0, %edx ; X32-NEXT: retl ; ; X64-LABEL: test_mm_extract_epi64: diff --git a/test/CodeGen/X86/sse41-schedule.ll b/test/CodeGen/X86/sse41-schedule.ll index 56f2dd4ff1c..ab163bd2e99 100644 --- a/test/CodeGen/X86/sse41-schedule.ll +++ b/test/CodeGen/X86/sse41-schedule.ll @@ -949,61 +949,71 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; GENERIC-LABEL: test_pextrd: ; GENERIC: # BB#0: +; GENERIC-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] ; GENERIC-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pextrd: ; SLM: # BB#0: +; SLM-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] ; SLM-NEXT: pextrd $3, %xmm0, %eax # sched: [1:1.00] ; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_pextrd: ; SANDY: # BB#0: +; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrd: ; HASWELL: # BB#0: +; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: test_pextrd: ; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] ; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: test_pextrd: ; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pextrd: ; SKX: # BB#0: +; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pextrd: ; BTVER2: # BB#0: +; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pextrd: ; ZNVER1: # BB#0: +; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25] ; ZNVER1-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.25] ; ZNVER1-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [8:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = extractelement <4 x i32> %a0, i32 3 - %2 = extractelement <4 x i32> %a0, i32 1 - store i32 %2, i32 *%a1 - ret i32 %1 + %1 = add <4 x i32> %a0, %a0 + %2 = extractelement <4 x i32> %1, i32 3 + %3 = extractelement <4 x i32> %1, i32 1 + store i32 %3, i32 *%a1 + ret i32 %2 } define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index 9f30767b10d..98ddd6d7f13 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -108,6 +108,7 @@ define float @ext_1(<4 x float> %v) nounwind { %t = fadd float %s, 1.0 ret float %t } + define float @ext_2(<4 x float> %v) nounwind { ; X32-LABEL: ext_2: ; X32: ## BB#0: @@ -125,15 +126,16 @@ define float @ext_2(<4 x float> %v) nounwind { %s = extractelement <4 x float> %v, i32 3 ret float %s } + define i32 @ext_3(<4 x i32> %v) nounwind { ; X32-LABEL: ext_3: ; X32: ## BB#0: -; X32-NEXT: pextrd $3, %xmm0, %eax +; X32-NEXT: extractps $3, %xmm0, %eax ; X32-NEXT: retl ; ; X64-LABEL: ext_3: ; X64: ## BB#0: -; X64-NEXT: pextrd $3, %xmm0, %eax +; X64-NEXT: extractps $3, %xmm0, %eax ; X64-NEXT: retq %i = extractelement <4 x i32> %v, i32 3 ret i32 %i @@ -261,7 +263,6 @@ define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind { ret i32 %tmp1 } - declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/test/CodeGen/X86/widen_load-3.ll b/test/CodeGen/X86/widen_load-3.ll index 3fd618fa678..bc36c5fbd57 100644 --- a/test/CodeGen/X86/widen_load-3.ll +++ b/test/CodeGen/X86/widen_load-3.ll @@ -25,33 +25,19 @@ define <7 x i64> @load7_aligned(<7 x i64>* %x) { ; X86-SSE-NEXT: movaps %xmm0, (%eax) ; X86-SSE-NEXT: retl $4 ; -; X86-AVX1-LABEL: load7_aligned: -; X86-AVX1: # BB#0: -; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-AVX1-NEXT: vmovaps (%ecx), %ymm0 -; X86-AVX1-NEXT: vmovaps 32(%ecx), %ymm1 -; X86-AVX1-NEXT: vmovaps %ymm0, (%eax) -; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 -; X86-AVX1-NEXT: vpextrd $1, %xmm0, 52(%eax) -; X86-AVX1-NEXT: vmovd %xmm0, 48(%eax) -; X86-AVX1-NEXT: vmovaps %xmm1, 32(%eax) -; X86-AVX1-NEXT: vzeroupper -; X86-AVX1-NEXT: retl $4 -; -; X86-AVX2-LABEL: load7_aligned: -; X86-AVX2: # BB#0: -; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-AVX2-NEXT: vmovaps (%ecx), %ymm0 -; X86-AVX2-NEXT: vmovdqa 32(%ecx), %ymm1 -; X86-AVX2-NEXT: vmovaps %ymm0, (%eax) -; X86-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 -; X86-AVX2-NEXT: vpextrd $1, %xmm0, 52(%eax) -; X86-AVX2-NEXT: vmovd %xmm0, 48(%eax) -; X86-AVX2-NEXT: vmovdqa %xmm1, 32(%eax) -; X86-AVX2-NEXT: vzeroupper -; X86-AVX2-NEXT: retl $4 +; X86-AVX-LABEL: load7_aligned: +; X86-AVX: # BB#0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX-NEXT: vmovaps (%ecx), %ymm0 +; X86-AVX-NEXT: vmovaps 32(%ecx), %ymm1 +; X86-AVX-NEXT: vmovaps %ymm0, (%eax) +; X86-AVX-NEXT: vextractf128 $1, %ymm1, %xmm0 +; X86-AVX-NEXT: vextractps $1, %xmm0, 52(%eax) +; X86-AVX-NEXT: vmovss %xmm0, 48(%eax) +; X86-AVX-NEXT: vmovaps %xmm1, 32(%eax) +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl $4 ; ; X64-SSE-LABEL: load7_aligned: ; X64-SSE: # BB#0: