int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psrl_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psrl_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLDrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psrl_q VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLQrm VR64:$src1, addr:$src2)>;
-
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
MMX_SHIFT_ITINS>;
int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psll_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psll_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLDrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psll_q VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLQrm VR64:$src1, addr:$src2)>;
-
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
MMX_SHIFT_ITINS>;
int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psra_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRAWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psra_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRADrm VR64:$src1, addr:$src2)>;
-
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
MMX_INTALU_ITINS>;
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movq 8(%ebp), %mm0
-; X86-NEXT: psllq 16(%ebp), %mm0
-; X86-NEXT: movq %mm0, (%esp)
+; X86-NEXT: movd 16(%ebp), %mm0
+; X86-NEXT: movq 8(%ebp), %mm1
+; X86-NEXT: psllq %mm0, %mm1
+; X86-NEXT: movq %mm1, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movd 20(%ebp), %mm0
-; X86-NEXT: psllq 16(%ebp), %mm0
-; X86-NEXT: por 8(%ebp), %mm0
-; X86-NEXT: movq %mm0, (%esp)
+; X86-NEXT: movd 16(%ebp), %mm0
+; X86-NEXT: movd 20(%ebp), %mm1
+; X86-NEXT: psllq %mm0, %mm1
+; X86-NEXT: por 8(%ebp), %mm1
+; X86-NEXT: movq %mm1, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psllq (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psllq %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X64-LABEL: t3:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psllq (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psllq %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psllq (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psllq %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X64-LABEL: t0:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psllq (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psllq %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psrlq (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psrlq %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X64-LABEL: t1:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psrlq (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psrlq %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psllw (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psllw %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X64-LABEL: t2:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psllw (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psllw %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psrlw (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psrlw %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X64-LABEL: t3:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psrlw (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psrlw %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: pslld (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: pslld %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X64-LABEL: t4:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: pslld (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: pslld %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psrld (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psrld %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X64-LABEL: t5:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psrld (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psrld %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psraw (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psraw %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X64-LABEL: t6:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psraw (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psraw %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psrad (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psrad %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X64-LABEL: t7:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psrad (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psrad %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
}
declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx)
-; FIXME: Show issue with storing i32 to stack and then reloading as x86_mmx
-; which will lead to garbage in the other 32-bits.
define void @test_psrlq_by_volatile_shift_amount(x86_mmx* %t) nounwind {
; X86-LABEL: test_psrlq_by_volatile_shift_amount:
; X86: # BB#0: # %entry
; X86-NEXT: subl $16, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $255, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psrlq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: movq %mm0, (%eax)
+; X86-NEXT: movq {{[0-9]+}}(%esp), %mm1
+; X86-NEXT: psrlq %mm0, %mm1
+; X86-NEXT: movq %mm1, (%eax)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
; X64-LABEL: test_psrlq_by_volatile_shift_amount:
; X64: # BB#0: # %entry
; X64-NEXT: movl $1, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movd -{{[0-9]+}}(%rsp), %mm0
; X64-NEXT: movl $255, %eax
-; X64-NEXT: movd %rax, %mm0
-; X64-NEXT: psrlq -{{[0-9]+}}(%rsp), %mm0
-; X64-NEXT: movq %mm0, (%rdi)
+; X64-NEXT: movd %rax, %mm1
+; X64-NEXT: psrlq %mm0, %mm1
+; X64-NEXT: movq %mm1, (%rdi)
; X64-NEXT: retq
entry:
%0 = alloca i32, align 4