From: Simon Pilgrim Date: Sat, 11 Feb 2017 13:32:55 +0000 (+0000) Subject: [X86][3DNow!] Enable commutation for PFADD/PFMUL/PFCMPEQ/PAVGUSB/PMULHRW X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0b45ed990b056cadf765d911039519abda70ced9;p=llvm [X86][3DNow!] Enable commutation for PFADD/PFMUL/PFCMPEQ/PAVGUSB/PMULHRW All commutations confirmed to give identical results - note PFMAX/PFMIN do not PFSUB<->PFSUBR should be commutable as well git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294846 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index ba1aede3c1a..49ac822b0da 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -38,7 +38,9 @@ multiclass I3DNow_binop_rm opc, string Mn> { def rm : I3DNow_binop; } -multiclass I3DNow_binop_rm_int opc, string Mn, string Ver = ""> { +multiclass I3DNow_binop_rm_int opc, string Mn, bit Commutable = 0, + string Ver = ""> { + let isCommutable = Commutable in def rr : I3DNow_binop( !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>; @@ -63,16 +65,16 @@ multiclass I3DNow_conv_rm_int opc, string Mn, string Ver = ""> { (bitconvert (load_mmx addr:$src))))]>; } -defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">; +defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", 1>; defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">; defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">; -defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">; -defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">; +defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd", 1>; +defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq", 1>; defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">; defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">; defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">; defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">; -defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">; +defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul", 1>; defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">; defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">; defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">; @@ -81,7 +83,7 @@ defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">; defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">; defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">; defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">; -defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">; +defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", 1>; def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", @@ -98,6 +100,6 @@ def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr", // "3DNowA" instructions defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">; defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">; -defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">; -defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">; +defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", 0, "a">; +defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", 0, "a">; defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">; diff --git a/test/CodeGen/X86/commute-3dnow.ll b/test/CodeGen/X86/commute-3dnow.ll index a121a586988..aefb57bcb16 100644 --- a/test/CodeGen/X86/commute-3dnow.ll +++ b/test/CodeGen/X86/commute-3dnow.ll @@ -2,8 +2,6 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X32 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X64 -; FIXME - missed commutation opportunities. - define void @commute_m_pfadd(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { ; X32-LABEL: commute_m_pfadd: ; X32: # BB#0: @@ -11,19 +9,17 @@ define void @commute_m_pfadd(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movq (%edx), %mm0 -; X32-NEXT: movq (%ecx), %mm1 ; X32-NEXT: pfadd (%eax), %mm0 -; X32-NEXT: pfadd %mm0, %mm1 -; X32-NEXT: movq %mm1, (%ecx) +; X32-NEXT: pfadd (%ecx), %mm0 +; X32-NEXT: movq %mm0, (%ecx) ; X32-NEXT: retl ; ; X64-LABEL: commute_m_pfadd: ; X64: # BB#0: ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: movq (%rdx), %mm1 ; X64-NEXT: pfadd (%rsi), %mm0 -; X64-NEXT: pfadd %mm0, %mm1 -; X64-NEXT: movq %mm1, (%rdx) +; X64-NEXT: pfadd (%rdx), %mm0 +; X64-NEXT: movq %mm0, (%rdx) ; X64-NEXT: retq %1 = load x86_mmx, x86_mmx* %a0 %2 = load x86_mmx, x86_mmx* %a1 @@ -35,6 +31,7 @@ define void @commute_m_pfadd(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind } declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) +; FIXME - missed PFSUB commutation. define void @commute_m_pfsub(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { ; X32-LABEL: commute_m_pfsub: ; X32: # BB#0: @@ -66,6 +63,7 @@ define void @commute_m_pfsub(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind } declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) +; FIXME - missed PFSUBR commutation. define void @commute_m_pfsubr(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { ; X32-LABEL: commute_m_pfsubr: ; X32: # BB#0: @@ -104,19 +102,17 @@ define void @commute_m_pfmul(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movq (%edx), %mm0 -; X32-NEXT: movq (%ecx), %mm1 ; X32-NEXT: pfmul (%eax), %mm0 -; X32-NEXT: pfmul %mm0, %mm1 -; X32-NEXT: movq %mm1, (%ecx) +; X32-NEXT: pfmul (%ecx), %mm0 +; X32-NEXT: movq %mm0, (%ecx) ; X32-NEXT: retl ; ; X64-LABEL: commute_m_pfmul: ; X64: # BB#0: ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: movq (%rdx), %mm1 ; X64-NEXT: pfmul (%rsi), %mm0 -; X64-NEXT: pfmul %mm0, %mm1 -; X64-NEXT: movq %mm1, (%rdx) +; X64-NEXT: pfmul (%rdx), %mm0 +; X64-NEXT: movq %mm0, (%rdx) ; X64-NEXT: retq %1 = load x86_mmx, x86_mmx* %a0 %2 = load x86_mmx, x86_mmx* %a1 @@ -135,19 +131,17 @@ define void @commute_m_pfcmpeq(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwin ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movq (%edx), %mm0 -; X32-NEXT: movq (%ecx), %mm1 ; X32-NEXT: pfcmpeq (%eax), %mm0 -; X32-NEXT: pfcmpeq %mm0, %mm1 -; X32-NEXT: movq %mm1, (%ecx) +; X32-NEXT: pfcmpeq (%ecx), %mm0 +; X32-NEXT: movq %mm0, (%ecx) ; X32-NEXT: retl ; ; X64-LABEL: commute_m_pfcmpeq: ; X64: # BB#0: ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: movq (%rdx), %mm1 ; X64-NEXT: pfcmpeq (%rsi), %mm0 -; X64-NEXT: pfcmpeq %mm0, %mm1 -; X64-NEXT: movq %mm1, (%rdx) +; X64-NEXT: pfcmpeq (%rdx), %mm0 +; X64-NEXT: movq %mm0, (%rdx) ; X64-NEXT: retq %1 = load x86_mmx, x86_mmx* %a0 %2 = load x86_mmx, x86_mmx* %a1 @@ -166,19 +160,17 @@ define void @commute_m_pavgusb(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwin ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movq (%edx), %mm0 -; X32-NEXT: movq (%ecx), %mm1 ; X32-NEXT: pavgusb (%eax), %mm0 -; X32-NEXT: pavgusb %mm0, %mm1 -; X32-NEXT: movq %mm1, (%ecx) +; X32-NEXT: pavgusb (%ecx), %mm0 +; X32-NEXT: movq %mm0, (%ecx) ; X32-NEXT: retl ; ; X64-LABEL: commute_m_pavgusb: ; X64: # BB#0: ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: movq (%rdx), %mm1 ; X64-NEXT: pavgusb (%rsi), %mm0 -; X64-NEXT: pavgusb %mm0, %mm1 -; X64-NEXT: movq %mm1, (%rdx) +; X64-NEXT: pavgusb (%rdx), %mm0 +; X64-NEXT: movq %mm0, (%rdx) ; X64-NEXT: retq %1 = load x86_mmx, x86_mmx* %a0 %2 = load x86_mmx, x86_mmx* %a1 @@ -197,19 +189,17 @@ define void @commute_m_pmulhrw(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwin ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movq (%edx), %mm0 -; X32-NEXT: movq (%ecx), %mm1 ; X32-NEXT: pmulhrw (%eax), %mm0 -; X32-NEXT: pmulhrw %mm0, %mm1 -; X32-NEXT: movq %mm1, (%ecx) +; X32-NEXT: pmulhrw (%ecx), %mm0 +; X32-NEXT: movq %mm0, (%ecx) ; X32-NEXT: retl ; ; X64-LABEL: commute_m_pmulhrw: ; X64: # BB#0: ; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: movq (%rdx), %mm1 ; X64-NEXT: pmulhrw (%rsi), %mm0 -; X64-NEXT: pmulhrw %mm0, %mm1 -; X64-NEXT: movq %mm1, (%rdx) +; X64-NEXT: pmulhrw (%rdx), %mm0 +; X64-NEXT: movq %mm0, (%rdx) ; X64-NEXT: retq %1 = load x86_mmx, x86_mmx* %a0 %2 = load x86_mmx, x86_mmx* %a1