From 2c72e2a32cb3198e60667d495421bcb2a1eabe1b Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 16 Nov 2016 17:42:40 +0000 Subject: [PATCH] [x86] add fake scalar FP logic instructions to ReplaceableInstrs to save some bytes We can replace "scalar" FP-bitwise-logic with other forms of bitwise-logic instructions. Scalar SSE/AVX FP-logic instructions only exist in your imagination and/or the bowels of compilers, but logically equivalent int, float, and double variants of bitwise-logic instructions are reality in x86, and the float variant may be a shorter instruction depending on which flavor (SSE or AVX) of vector ISA you have...so just prefer float all the time. This is a preliminary step towards solving PR6137: https://llvm.org/bugs/show_bug.cgi?id=6137 Differential Revision: https://reviews.llvm.org/D26712 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287122 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 8 ++ test/CodeGen/X86/fast-isel-select-sse.ll | 80 ++++++++-------- test/CodeGen/X86/fp-logic-replace.ll | 18 ++-- test/CodeGen/X86/fp-logic.ll | 2 +- test/CodeGen/X86/fp-select-cmp-and.ll | 32 +++---- test/CodeGen/X86/sse-minmax.ll | 112 +++++++++++------------ 6 files changed, 130 insertions(+), 122 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index c4a9684b3e5..cf12b3665a5 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -7690,12 +7690,16 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, + { X86::FsANDNPSrr, X86::FsANDNPDrr,X86::PANDNrr }, { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm }, { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, + { X86::FsANDPSrr, X86::FsANDPDrr, X86::PANDrr }, { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, + { X86::FsORPSrr, X86::FsORPDrr, X86::PORrr }, { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, + { X86::FsXORPSrr, X86::FsXORPDrr, X86::PXORrr }, // AVX 128-bit support { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, @@ -7706,12 +7710,16 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, + { X86::VFsANDNPSrr,X86::VFsANDNPDrr,X86::VPANDNrr }, { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm }, { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, + { X86::VFsANDPSrr, X86::VFsANDPDrr, X86::VPANDrr }, { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, + { X86::VFsORPSrr, X86::VFsORPDrr, X86::VPORrr }, { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, + { X86::VFsXORPSrr, X86::VFsXORPDrr, X86::VPXORrr }, // AVX 256-bit support { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, diff --git a/test/CodeGen/X86/fast-isel-select-sse.ll b/test/CodeGen/X86/fast-isel-select-sse.ll index 033fc23ce91..026732d8ce5 100644 --- a/test/CodeGen/X86/fast-isel-select-sse.ll +++ b/test/CodeGen/X86/fast-isel-select-sse.ll @@ -30,9 +30,9 @@ define double @select_fcmp_oeq_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_oeq_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpeqsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_oeq_f64: @@ -71,10 +71,10 @@ define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ogt_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpltsd %xmm0, %xmm1 -; SSE-NEXT: andpd %xmm1, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm2, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: andps %xmm1, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm1 +; SSE-NEXT: orps %xmm2, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ogt_f64: @@ -113,10 +113,10 @@ define double @select_fcmp_oge_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_oge_f64: ; SSE: # BB#0: ; SSE-NEXT: cmplesd %xmm0, %xmm1 -; SSE-NEXT: andpd %xmm1, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm2, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: andps %xmm1, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm1 +; SSE-NEXT: orps %xmm2, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_oge_f64: @@ -154,9 +154,9 @@ define double @select_fcmp_olt_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_olt_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpltsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_olt_f64: @@ -194,9 +194,9 @@ define double @select_fcmp_ole_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ole_f64: ; SSE: # BB#0: ; SSE-NEXT: cmplesd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ole_f64: @@ -234,9 +234,9 @@ define double @select_fcmp_ord_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ord_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpordsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ord_f64: @@ -274,9 +274,9 @@ define double @select_fcmp_uno_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_uno_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpunordsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_uno_f64: @@ -314,9 +314,9 @@ define double @select_fcmp_ugt_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ugt_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpnlesd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ugt_f64: @@ -354,9 +354,9 @@ define double @select_fcmp_uge_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_uge_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpnltsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_uge_f64: @@ -395,10 +395,10 @@ define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ult_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpnlesd %xmm0, %xmm1 -; SSE-NEXT: andpd %xmm1, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm2, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: andps %xmm1, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm1 +; SSE-NEXT: orps %xmm2, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ult_f64: @@ -437,10 +437,10 @@ define double @select_fcmp_ule_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ule_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpnltsd %xmm0, %xmm1 -; SSE-NEXT: andpd %xmm1, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm2, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: andps %xmm1, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm1 +; SSE-NEXT: orps %xmm2, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ule_f64: @@ -478,9 +478,9 @@ define double @select_fcmp_une_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_une_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpneqsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_une_f64: diff --git a/test/CodeGen/X86/fp-logic-replace.ll b/test/CodeGen/X86/fp-logic-replace.ll index 8f2f649ce70..47e07688702 100644 --- a/test/CodeGen/X86/fp-logic-replace.ll +++ b/test/CodeGen/X86/fp-logic-replace.ll @@ -3,20 +3,20 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX ; Test that we can replace "scalar" FP-bitwise-logic with the optimal instruction. -; Scalar x86 FP-logic instructions only exist in your imagination and/or the bowels +; Scalar x86 FP-logic instructions only exist in your imagination and/or the bowels ; of compilers, but float and double variants of FP-logic instructions are reality -; and float may be a shorter instruction depending on which flavor of vector ISA -; you have...so just prefer float all the time, ok? Yay, x86! +; and float may be a shorter instruction depending on which flavor of vector ISA +; you have...so just prefer float all the time, ok? Yay, x86! define double @FsANDPSrr(double %x, double %y) { ; SSE-LABEL: FsANDPSrr: ; SSE: # BB#0: -; SSE-NEXT: andpd %xmm1, %xmm0 +; SSE-NEXT: andps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: FsANDPSrr: ; AVX: # BB#0: -; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; %bc1 = bitcast double %x to i64 @@ -56,12 +56,12 @@ define double @FsANDNPSrr(double %x, double %y) { define double @FsORPSrr(double %x, double %y) { ; SSE-LABEL: FsORPSrr: ; SSE: # BB#0: -; SSE-NEXT: orpd %xmm1, %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: FsORPSrr: ; AVX: # BB#0: -; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; %bc1 = bitcast double %x to i64 @@ -74,12 +74,12 @@ define double @FsORPSrr(double %x, double %y) { define double @FsXORPSrr(double %x, double %y) { ; SSE-LABEL: FsXORPSrr: ; SSE: # BB#0: -; SSE-NEXT: xorpd %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: FsXORPSrr: ; AVX: # BB#0: -; AVX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; %bc1 = bitcast double %x to i64 diff --git a/test/CodeGen/X86/fp-logic.ll b/test/CodeGen/X86/fp-logic.ll index 40bab00e1ab..2c6698fb120 100644 --- a/test/CodeGen/X86/fp-logic.ll +++ b/test/CodeGen/X86/fp-logic.ll @@ -223,7 +223,7 @@ define float @f7_xor(float %x) { define double @doubles(double %x, double %y) { ; CHECK-LABEL: doubles: ; CHECK: # BB#0: -; CHECK-NEXT: andpd %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %bc1 = bitcast double %x to i64 diff --git a/test/CodeGen/X86/fp-select-cmp-and.ll b/test/CodeGen/X86/fp-select-cmp-and.ll index e012809cf48..c9c8922c97f 100644 --- a/test/CodeGen/X86/fp-select-cmp-and.ll +++ b/test/CodeGen/X86/fp-select-cmp-and.ll @@ -5,7 +5,7 @@ define double @test1(double %a, double %b, double %eps) { ; CHECK-LABEL: test1: ; CHECK: # BB#0: ; CHECK-NEXT: cmpltsd %xmm2, %xmm0 -; CHECK-NEXT: andpd %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp olt double %a, %eps @@ -17,7 +17,7 @@ define double @test2(double %a, double %b, double %eps) { ; CHECK-LABEL: test2: ; CHECK: # BB#0: ; CHECK-NEXT: cmplesd %xmm2, %xmm0 -; CHECK-NEXT: andpd %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp ole double %a, %eps @@ -29,8 +29,8 @@ define double @test3(double %a, double %b, double %eps) { ; CHECK-LABEL: test3: ; CHECK: # BB#0: ; CHECK-NEXT: cmpltsd %xmm0, %xmm2 -; CHECK-NEXT: andpd %xmm1, %xmm2 -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: andps %xmm1, %xmm2 +; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp ogt double %a, %eps @@ -42,8 +42,8 @@ define double @test4(double %a, double %b, double %eps) { ; CHECK-LABEL: test4: ; CHECK: # BB#0: ; CHECK-NEXT: cmplesd %xmm0, %xmm2 -; CHECK-NEXT: andpd %xmm1, %xmm2 -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: andps %xmm1, %xmm2 +; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp oge double %a, %eps @@ -55,7 +55,7 @@ define double @test5(double %a, double %b, double %eps) { ; CHECK-LABEL: test5: ; CHECK: # BB#0: ; CHECK-NEXT: cmpltsd %xmm2, %xmm0 -; CHECK-NEXT: andnpd %xmm1, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp olt double %a, %eps @@ -67,7 +67,7 @@ define double @test6(double %a, double %b, double %eps) { ; CHECK-LABEL: test6: ; CHECK: # BB#0: ; CHECK-NEXT: cmplesd %xmm2, %xmm0 -; CHECK-NEXT: andnpd %xmm1, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp ole double %a, %eps @@ -79,8 +79,8 @@ define double @test7(double %a, double %b, double %eps) { ; CHECK-LABEL: test7: ; CHECK: # BB#0: ; CHECK-NEXT: cmpltsd %xmm0, %xmm2 -; CHECK-NEXT: andnpd %xmm1, %xmm2 -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm2 +; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp ogt double %a, %eps @@ -92,8 +92,8 @@ define double @test8(double %a, double %b, double %eps) { ; CHECK-LABEL: test8: ; CHECK: # BB#0: ; CHECK-NEXT: cmplesd %xmm0, %xmm2 -; CHECK-NEXT: andnpd %xmm1, %xmm2 -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm2 +; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp oge double %a, %eps @@ -220,10 +220,10 @@ define double @test18(double %a, double %b, double %c, double %eps) { ; CHECK-LABEL: test18: ; CHECK: # BB#0: ; CHECK-NEXT: cmplesd %xmm0, %xmm3 -; CHECK-NEXT: andpd %xmm3, %xmm2 -; CHECK-NEXT: andnpd %xmm1, %xmm3 -; CHECK-NEXT: orpd %xmm2, %xmm3 -; CHECK-NEXT: movapd %xmm3, %xmm0 +; CHECK-NEXT: andps %xmm3, %xmm2 +; CHECK-NEXT: andnps %xmm1, %xmm3 +; CHECK-NEXT: orps %xmm2, %xmm3 +; CHECK-NEXT: movaps %xmm3, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp oge double %a, %eps diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index c7a170d2a68..b9d9e318920 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -84,11 +84,11 @@ define double @olt_inverse(double %x, double %y) { define double @oge(double %x, double %y) { ; STRICT-LABEL: oge: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm1, %xmm2 ; STRICT-NEXT: cmplesd %xmm0, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm0 -; STRICT-NEXT: andnpd %xmm1, %xmm2 -; STRICT-NEXT: orpd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm0 +; STRICT-NEXT: andnps %xmm1, %xmm2 +; STRICT-NEXT: orps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: oge: @@ -104,12 +104,12 @@ define double @oge(double %x, double %y) { define double @ole(double %x, double %y) { ; STRICT-LABEL: ole: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm0, %xmm2 ; STRICT-NEXT: cmplesd %xmm1, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm0 -; STRICT-NEXT: andnpd %xmm1, %xmm2 -; STRICT-NEXT: orpd %xmm0, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm0 +; STRICT-NEXT: andnps %xmm1, %xmm2 +; STRICT-NEXT: orps %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ole: @@ -125,12 +125,12 @@ define double @ole(double %x, double %y) { define double @oge_inverse(double %x, double %y) { ; STRICT-LABEL: oge_inverse: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm1, %xmm2 ; STRICT-NEXT: cmplesd %xmm0, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm2 -; STRICT-NEXT: orpd %xmm1, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm1 +; STRICT-NEXT: andnps %xmm0, %xmm2 +; STRICT-NEXT: orps %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: oge_inverse: @@ -152,12 +152,12 @@ define double @oge_inverse(double %x, double %y) { define double @ole_inverse(double %x, double %y) { ; STRICT-LABEL: ole_inverse: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm0, %xmm2 ; STRICT-NEXT: cmplesd %xmm1, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm2 -; STRICT-NEXT: orpd %xmm1, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm1 +; STRICT-NEXT: andnps %xmm0, %xmm2 +; STRICT-NEXT: orps %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ole_inverse: @@ -257,7 +257,7 @@ define double @oge_x(double %x) { ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm1, %xmm1 ; STRICT-NEXT: cmplesd %xmm0, %xmm1 -; STRICT-NEXT: andpd %xmm1, %xmm0 +; STRICT-NEXT: andps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: oge_x: @@ -275,10 +275,10 @@ define double @ole_x(double %x) { ; STRICT-LABEL: ole_x: ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm2, %xmm2 -; STRICT-NEXT: movapd %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm0, %xmm1 ; STRICT-NEXT: cmplesd %xmm2, %xmm1 -; STRICT-NEXT: andpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ole_x: @@ -297,8 +297,8 @@ define double @oge_inverse_x(double %x) { ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm1, %xmm1 ; STRICT-NEXT: cmplesd %xmm0, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andnps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: oge_inverse_x: @@ -323,10 +323,10 @@ define double @ole_inverse_x(double %x) { ; STRICT-LABEL: ole_inverse_x: ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm2, %xmm2 -; STRICT-NEXT: movapd %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm0, %xmm1 ; STRICT-NEXT: cmplesd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andnps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ole_inverse_x: @@ -350,12 +350,12 @@ define double @ole_inverse_x(double %x) { define double @ugt(double %x, double %y) { ; STRICT-LABEL: ugt: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm0, %xmm2 ; STRICT-NEXT: cmpnlesd %xmm1, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm0 -; STRICT-NEXT: andnpd %xmm1, %xmm2 -; STRICT-NEXT: orpd %xmm0, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm0 +; STRICT-NEXT: andnps %xmm1, %xmm2 +; STRICT-NEXT: orps %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ugt: @@ -371,11 +371,11 @@ define double @ugt(double %x, double %y) { define double @ult(double %x, double %y) { ; STRICT-LABEL: ult: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm1, %xmm2 ; STRICT-NEXT: cmpnlesd %xmm0, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm0 -; STRICT-NEXT: andnpd %xmm1, %xmm2 -; STRICT-NEXT: orpd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm0 +; STRICT-NEXT: andnps %xmm1, %xmm2 +; STRICT-NEXT: orps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ult: @@ -391,12 +391,12 @@ define double @ult(double %x, double %y) { define double @ugt_inverse(double %x, double %y) { ; STRICT-LABEL: ugt_inverse: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm0, %xmm2 ; STRICT-NEXT: cmpnlesd %xmm1, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm2 -; STRICT-NEXT: orpd %xmm1, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm1 +; STRICT-NEXT: andnps %xmm0, %xmm2 +; STRICT-NEXT: orps %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ugt_inverse: @@ -418,12 +418,12 @@ define double @ugt_inverse(double %x, double %y) { define double @ult_inverse(double %x, double %y) { ; STRICT-LABEL: ult_inverse: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm1, %xmm2 ; STRICT-NEXT: cmpnlesd %xmm0, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm2 -; STRICT-NEXT: orpd %xmm1, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm1 +; STRICT-NEXT: andnps %xmm0, %xmm2 +; STRICT-NEXT: orps %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ult_inverse: @@ -524,10 +524,10 @@ define double @ugt_x(double %x) { ; STRICT-LABEL: ugt_x: ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm2, %xmm2 -; STRICT-NEXT: movapd %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm0, %xmm1 ; STRICT-NEXT: cmpnlesd %xmm2, %xmm1 -; STRICT-NEXT: andpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ugt_x: @@ -546,7 +546,7 @@ define double @ult_x(double %x) { ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm1, %xmm1 ; STRICT-NEXT: cmpnlesd %xmm0, %xmm1 -; STRICT-NEXT: andpd %xmm1, %xmm0 +; STRICT-NEXT: andps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ult_x: @@ -564,10 +564,10 @@ define double @ugt_inverse_x(double %x) { ; STRICT-LABEL: ugt_inverse_x: ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm2, %xmm2 -; STRICT-NEXT: movapd %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm0, %xmm1 ; STRICT-NEXT: cmpnlesd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andnps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ugt_inverse_x: @@ -593,8 +593,8 @@ define double @ult_inverse_x(double %x) { ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm1, %xmm1 ; STRICT-NEXT: cmpnlesd %xmm0, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andnps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ult_inverse_x: -- 2.50.1