2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
11 %define private_prefix vp9
13 %include "third_party/x86inc/x86inc.asm"
14 %include "vpx_dsp/x86/bitdepth_conversion_sse2.asm"
18 ; int64_t vp9_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size,
22 cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
23 pxor m4, m4 ; sse accumulator
24 pxor m6, m6 ; ssz accumulator
25 pxor m5, m5 ; dedicated zero register
27 LOAD_TRAN_LOW 2, uqcq, 0
28 LOAD_TRAN_LOW 0, dqcq, 0
29 LOAD_TRAN_LOW 3, uqcq, 8
30 LOAD_TRAN_LOW 1, dqcq, 8
31 INCREMENT_ELEMENTS_TRAN_LOW uqcq, 16
32 INCREMENT_ELEMENTS_TRAN_LOW dqcq, 16
36 ; individual errors are max. 15bit+sign, so squares are 30bit, and
37 ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
42 ; the sum of 2 31bit integers will fit in a 32bit unsigned integer
56 ; accumulate horizontally and store in return value
73 ; Compute the sum of squared difference between two tran_low_t vectors.
74 ; Vectors are converted (if necessary) to int16_t for calculations.
75 ; int64_t vp9_block_error_fp(tran_low_t *coeff, tran_low_t *dqcoeff,
76 ; intptr_t block_size)
79 cglobal block_error_fp, 3, 3, 6, uqc, dqc, size
80 pxor m4, m4 ; sse accumulator
81 pxor m5, m5 ; dedicated zero register
83 LOAD_TRAN_LOW 2, uqcq, 0
84 LOAD_TRAN_LOW 0, dqcq, 0
85 LOAD_TRAN_LOW 3, uqcq, 8
86 LOAD_TRAN_LOW 1, dqcq, 8
87 INCREMENT_ELEMENTS_TRAN_LOW uqcq, 16
88 INCREMENT_ELEMENTS_TRAN_LOW dqcq, 16
92 ; individual errors are max. 15bit+sign, so squares are 30bit, and
93 ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
96 ; the sum of 2 31bit integers will fit in a 32bit unsigned integer
105 ; accumulate horizontally and store in return value