2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_ports/x86_abi_support.asm"
16 ;unsigned int vp8_sad16x16_wmt(
17 ; unsigned char *src_ptr,
19 ; unsigned char *ref_ptr,
21 global sym(vp8_sad16x16_wmt)
22 sym(vp8_sad16x16_wmt):
25 SHADOW_ARGS_TO_STACK 4
30 mov rsi, arg(0) ;src_ptr
31 mov rdi, arg(2) ;ref_ptr
33 movsxd rax, dword ptr arg(1) ;src_stride
34 movsxd rdx, dword ptr arg(3) ;ref_stride
43 movq xmm0, QWORD PTR [rsi]
44 movq xmm2, QWORD PTR [rsi+8]
46 movq xmm1, QWORD PTR [rdi]
47 movq xmm3, QWORD PTR [rdi+8]
49 movq xmm4, QWORD PTR [rsi+rax]
50 movq xmm5, QWORD PTR [rdi+rdx]
57 movq xmm6, QWORD PTR [rsi+rax+8]
59 movq xmm3, QWORD PTR [rdi+rdx+8]
72 jne x16x16sad_wmt_loop
87 ;unsigned int vp8_sad8x16_wmt(
88 ; unsigned char *src_ptr,
90 ; unsigned char *ref_ptr,
93 global sym(vp8_sad8x16_wmt)
97 SHADOW_ARGS_TO_STACK 5
103 mov rsi, arg(0) ;src_ptr
104 mov rdi, arg(2) ;ref_ptr
106 movsxd rbx, dword ptr arg(1) ;src_stride
107 movsxd rdx, dword ptr arg(3) ;ref_stride
118 jg x8x16sad_wmt_early_exit
120 movq mm0, QWORD PTR [rsi]
121 movq mm1, QWORD PTR [rdi]
123 movq mm2, QWORD PTR [rsi+rbx]
124 movq mm3, QWORD PTR [rdi+rdx]
136 jne x8x16sad_wmt_loop
140 x8x16sad_wmt_early_exit:
151 ;unsigned int vp8_sad8x8_wmt(
152 ; unsigned char *src_ptr,
154 ; unsigned char *ref_ptr,
156 global sym(vp8_sad8x8_wmt)
160 SHADOW_ARGS_TO_STACK 5
166 mov rsi, arg(0) ;src_ptr
167 mov rdi, arg(2) ;ref_ptr
169 movsxd rbx, dword ptr arg(1) ;src_stride
170 movsxd rdx, dword ptr arg(3) ;ref_stride
179 jg x8x8sad_wmt_early_exit
181 movq mm0, QWORD PTR [rsi]
182 movq mm1, QWORD PTR [rdi]
194 x8x8sad_wmt_early_exit:
204 ;unsigned int vp8_sad4x4_wmt(
205 ; unsigned char *src_ptr,
207 ; unsigned char *ref_ptr,
209 global sym(vp8_sad4x4_wmt)
213 SHADOW_ARGS_TO_STACK 4
218 mov rsi, arg(0) ;src_ptr
219 mov rdi, arg(2) ;ref_ptr
221 movsxd rax, dword ptr arg(1) ;src_stride
222 movsxd rdx, dword ptr arg(3) ;ref_stride
224 movd mm0, QWORD PTR [rsi]
225 movd mm1, QWORD PTR [rdi]
227 movd mm2, QWORD PTR [rsi+rax]
228 movd mm3, QWORD PTR [rdi+rdx]
237 movd mm4, QWORD PTR [rsi]
239 movd mm5, QWORD PTR [rdi]
240 movd mm6, QWORD PTR [rsi+rax]
242 movd mm7, QWORD PTR [rdi+rdx]
259 ;unsigned int vp8_sad16x8_wmt(
260 ; unsigned char *src_ptr,
262 ; unsigned char *ref_ptr,
264 global sym(vp8_sad16x8_wmt)
265 sym(vp8_sad16x8_wmt):
268 SHADOW_ARGS_TO_STACK 5
275 mov rsi, arg(0) ;src_ptr
276 mov rdi, arg(2) ;ref_ptr
278 movsxd rbx, dword ptr arg(1) ;src_stride
279 movsxd rdx, dword ptr arg(3) ;ref_stride
288 jg x16x8sad_wmt_early_exit
290 movq mm0, QWORD PTR [rsi]
291 movq mm2, QWORD PTR [rsi+8]
293 movq mm1, QWORD PTR [rdi]
294 movq mm3, QWORD PTR [rdi+8]
296 movq mm4, QWORD PTR [rsi+rbx]
297 movq mm5, QWORD PTR [rdi+rdx]
302 movq mm1, QWORD PTR [rsi+rbx+8]
303 movq mm3, QWORD PTR [rdi+rdx+8]
318 jne x16x8sad_wmt_loop
322 x16x8sad_wmt_early_exit: