1 sub vp10_common_forward_decls() {
7 #include "vpx/vpx_integer.h"
8 #include "vp10/common/common.h"
9 #include "vp10/common/enums.h"
13 /* Encoder forward decls */
15 struct vp9_variance_vtable;
16 struct search_site_config;
19 struct yv12_buffer_config;
22 forward_decls qw/vp10_common_forward_decls/;
24 # x86inc.asm had specific constraints. break it out so it's easy to disable.
25 # zero all the variables to avoid tricky else conditions.
26 $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
28 $mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc =
29 $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = '';
30 if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
33 $sse2_x86inc = 'sse2';
34 $ssse3_x86inc = 'ssse3';
36 $avx2_x86inc = 'avx2';
37 if ($opts{arch} eq "x86_64") {
38 $mmx_x86_64_x86inc = 'mmx';
39 $sse_x86_64_x86inc = 'sse';
40 $sse2_x86_64_x86inc = 'sse2';
41 $ssse3_x86_64_x86inc = 'ssse3';
42 $avx_x86_64_x86inc = 'avx';
43 $avx2_x86_64_x86inc = 'avx2';
47 # functions that are 64 bit only.
48 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
49 if ($opts{arch} eq "x86_64") {
51 $sse2_x86_64 = 'sse2';
52 $ssse3_x86_64 = 'ssse3';
54 $avx2_x86_64 = 'avx2';
60 if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
61 add_proto qw/void vp10_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
62 specialize qw/vp10_mbpost_proc_down sse2/;
63 $vp10_mbpost_proc_down_sse2=vp10_mbpost_proc_down_xmm;
65 add_proto qw/void vp10_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
66 specialize qw/vp10_mbpost_proc_across_ip sse2/;
67 $vp10_mbpost_proc_across_ip_sse2=vp10_mbpost_proc_across_ip_xmm;
69 add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
70 specialize qw/vp10_post_proc_down_and_across sse2/;
71 $vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm;
73 add_proto qw/void vp10_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
74 specialize qw/vp10_plane_add_noise sse2/;
75 $vp10_plane_add_noise_sse2=vp10_plane_add_noise_wmt;
77 add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
78 specialize qw/vp10_filter_by_weight16x16 sse2 msa/;
80 add_proto qw/void vp10_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
81 specialize qw/vp10_filter_by_weight8x8 sse2 msa/;
87 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
88 # Note as optimized versions of these functions are added we need to add a check to ensure
89 # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
90 add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
91 specialize qw/vp10_iht4x4_16_add/;
93 add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
94 specialize qw/vp10_iht8x8_64_add/;
96 add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
97 specialize qw/vp10_iht16x16_256_add/;
99 add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
100 specialize qw/vp10_fdct4x4 sse2/;
102 add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
103 specialize qw/vp10_fdct4x4_1 sse2/;
105 add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
106 specialize qw/vp10_fdct8x8 sse2/;
108 add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
109 specialize qw/vp10_fdct8x8_1 sse2/;
111 add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
112 specialize qw/vp10_fdct16x16 sse2/;
114 add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
115 specialize qw/vp10_fdct16x16_1 sse2/;
117 add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
118 specialize qw/vp10_fdct32x32 sse2/;
120 add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
121 specialize qw/vp10_fdct32x32_rd sse2/;
123 add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
124 specialize qw/vp10_fdct32x32_1 sse2/;
126 add_proto qw/void vp10_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
127 specialize qw/vp10_highbd_fdct4x4 sse2/;
129 add_proto qw/void vp10_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
130 specialize qw/vp10_highbd_fdct8x8 sse2/;
132 add_proto qw/void vp10_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
133 specialize qw/vp10_highbd_fdct8x8_1/;
135 add_proto qw/void vp10_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
136 specialize qw/vp10_highbd_fdct16x16 sse2/;
138 add_proto qw/void vp10_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
139 specialize qw/vp10_highbd_fdct16x16_1/;
141 add_proto qw/void vp10_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
142 specialize qw/vp10_highbd_fdct32x32 sse2/;
144 add_proto qw/void vp10_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
145 specialize qw/vp10_highbd_fdct32x32_rd sse2/;
147 add_proto qw/void vp10_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
148 specialize qw/vp10_highbd_fdct32x32_1/;
150 # Force C versions if CONFIG_EMULATE_HARDWARE is 1
151 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
152 add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
153 specialize qw/vp10_iht4x4_16_add/;
155 add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
156 specialize qw/vp10_iht8x8_64_add/;
158 add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
159 specialize qw/vp10_iht16x16_256_add/;
161 add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
162 specialize qw/vp10_fdct4x4/;
164 add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
165 specialize qw/vp10_fdct4x4_1/;
167 add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
168 specialize qw/vp10_fdct8x8/;
170 add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
171 specialize qw/vp10_fdct8x8_1/;
173 add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
174 specialize qw/vp10_fdct16x16/;
176 add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
177 specialize qw/vp10_fdct16x16_1/;
179 add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
180 specialize qw/vp10_fdct32x32/;
182 add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
183 specialize qw/vp10_fdct32x32_rd/;
185 add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
186 specialize qw/vp10_fdct32x32_1/;
188 add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
189 specialize qw/vp10_iht4x4_16_add sse2 neon dspr2 msa/;
191 add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
192 specialize qw/vp10_iht8x8_64_add sse2 neon dspr2 msa/;
194 add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
195 specialize qw/vp10_iht16x16_256_add sse2 dspr2 msa/;
197 add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
198 specialize qw/vp10_fdct4x4 sse2/;
200 add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
201 specialize qw/vp10_fdct4x4_1 sse2/;
203 add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
204 specialize qw/vp10_fdct8x8 sse2/;
206 add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
207 specialize qw/vp10_fdct8x8_1 sse2/;
209 add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
210 specialize qw/vp10_fdct16x16 sse2/;
212 add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
213 specialize qw/vp10_fdct16x16_1 sse2/;
215 add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
216 specialize qw/vp10_fdct32x32 sse2/;
218 add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
219 specialize qw/vp10_fdct32x32_rd sse2/;
221 add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
222 specialize qw/vp10_fdct32x32_1 sse2/;
226 # High bitdepth functions
227 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
231 add_proto qw/void vp10_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
232 specialize qw/vp10_highbd_convolve_copy/;
234 add_proto qw/void vp10_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
235 specialize qw/vp10_highbd_convolve_avg/;
237 add_proto qw/void vp10_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
238 specialize qw/vp10_highbd_convolve8/, "$sse2_x86_64";
240 add_proto qw/void vp10_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
241 specialize qw/vp10_highbd_convolve8_horiz/, "$sse2_x86_64";
243 add_proto qw/void vp10_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
244 specialize qw/vp10_highbd_convolve8_vert/, "$sse2_x86_64";
246 add_proto qw/void vp10_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
247 specialize qw/vp10_highbd_convolve8_avg/, "$sse2_x86_64";
249 add_proto qw/void vp10_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
250 specialize qw/vp10_highbd_convolve8_avg_horiz/, "$sse2_x86_64";
252 add_proto qw/void vp10_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
253 specialize qw/vp10_highbd_convolve8_avg_vert/, "$sse2_x86_64";
258 if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
259 add_proto qw/void vp10_highbd_mbpost_proc_down/, "uint16_t *dst, int pitch, int rows, int cols, int flimit";
260 specialize qw/vp10_highbd_mbpost_proc_down/;
262 add_proto qw/void vp10_highbd_mbpost_proc_across_ip/, "uint16_t *src, int pitch, int rows, int cols, int flimit";
263 specialize qw/vp10_highbd_mbpost_proc_across_ip/;
265 add_proto qw/void vp10_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
266 specialize qw/vp10_highbd_post_proc_down_and_across/;
268 add_proto qw/void vp10_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
269 specialize qw/vp10_highbd_plane_add_noise/;
275 # Note as optimized versions of these functions are added we need to add a check to ensure
276 # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
277 add_proto qw/void vp10_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
278 specialize qw/vp10_highbd_iht4x4_16_add/;
280 add_proto qw/void vp10_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
281 specialize qw/vp10_highbd_iht8x8_64_add/;
283 add_proto qw/void vp10_highbd_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd";
284 specialize qw/vp10_highbd_iht16x16_256_add/;
288 # Encoder functions below this point.
290 if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
292 add_proto qw/unsigned int vp10_avg_8x8/, "const uint8_t *, int p";
293 specialize qw/vp10_avg_8x8 sse2 neon msa/;
295 add_proto qw/unsigned int vp10_avg_4x4/, "const uint8_t *, int p";
296 specialize qw/vp10_avg_4x4 sse2 msa/;
298 add_proto qw/void vp10_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
299 specialize qw/vp10_minmax_8x8 sse2/;
301 add_proto qw/void vp10_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
302 specialize qw/vp10_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
304 add_proto qw/void vp10_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
305 specialize qw/vp10_hadamard_16x16 sse2/;
307 add_proto qw/int16_t vp10_satd/, "const int16_t *coeff, int length";
308 specialize qw/vp10_satd sse2/;
310 add_proto qw/void vp10_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height";
311 specialize qw/vp10_int_pro_row sse2 neon/;
313 add_proto qw/int16_t vp10_int_pro_col/, "uint8_t const *ref, const int width";
314 specialize qw/vp10_int_pro_col sse2 neon/;
316 add_proto qw/int vp10_vector_var/, "int16_t const *ref, int16_t const *src, const int bwl";
317 specialize qw/vp10_vector_var neon sse2/;
319 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
320 add_proto qw/unsigned int vp10_highbd_avg_8x8/, "const uint8_t *, int p";
321 specialize qw/vp10_highbd_avg_8x8/;
322 add_proto qw/unsigned int vp10_highbd_avg_4x4/, "const uint8_t *, int p";
323 specialize qw/vp10_highbd_avg_4x4/;
324 add_proto qw/void vp10_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
325 specialize qw/vp10_highbd_minmax_8x8/;
333 if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") {
334 add_proto qw/int vp10_denoiser_filter/, "const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude";
335 specialize qw/vp10_denoiser_filter sse2/;
338 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
339 # the transform coefficients are held in 32-bit
340 # values, so the assembler code for vp10_block_error can no longer be used.
341 add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
342 specialize qw/vp10_block_error/;
344 add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
345 specialize qw/vp10_quantize_fp/;
347 add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
348 specialize qw/vp10_quantize_fp_32x32/;
350 add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
351 specialize qw/vp10_fdct8x8_quant/;
353 add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
354 specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
356 add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
357 specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
359 add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
360 specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc";
362 add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
363 specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
365 add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
366 specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
371 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
372 add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
373 specialize qw/vp10_fht4x4 sse2/;
375 add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
376 specialize qw/vp10_fht8x8 sse2/;
378 add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
379 specialize qw/vp10_fht16x16 sse2/;
381 add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
382 specialize qw/vp10_fwht4x4/, "$mmx_x86inc";
384 add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
385 specialize qw/vp10_fht4x4 sse2 msa/;
387 add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
388 specialize qw/vp10_fht8x8 sse2 msa/;
390 add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
391 specialize qw/vp10_fht16x16 sse2 msa/;
393 add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
394 specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
398 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
399 # Note as optimized versions of these functions are added we need to add a check to ensure
400 # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
401 add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
402 specialize qw/vp10_idct4x4_1_add/;
404 add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
405 specialize qw/vp10_idct4x4_16_add/;
407 add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
408 specialize qw/vp10_idct8x8_1_add/;
410 add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
411 specialize qw/vp10_idct8x8_64_add/;
413 add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
414 specialize qw/vp10_idct8x8_12_add/;
416 add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
417 specialize qw/vp10_idct16x16_1_add/;
419 add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
420 specialize qw/vp10_idct16x16_256_add/;
422 add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
423 specialize qw/vp10_idct16x16_10_add/;
425 add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
426 specialize qw/vp10_idct32x32_1024_add/;
428 add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
429 specialize qw/vp10_idct32x32_34_add/;
431 add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
432 specialize qw/vp10_idct32x32_1_add/;
434 add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
435 specialize qw/vp10_iwht4x4_1_add/;
437 add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
438 specialize qw/vp10_iwht4x4_16_add/;
440 add_proto qw/void vp10_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
441 specialize qw/vp10_highbd_idct4x4_1_add/;
443 add_proto qw/void vp10_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
444 specialize qw/vp10_highbd_idct8x8_1_add/;
446 add_proto qw/void vp10_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
447 specialize qw/vp10_highbd_idct16x16_1_add/;
449 add_proto qw/void vp10_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
450 specialize qw/vp10_highbd_idct32x32_1024_add/;
452 add_proto qw/void vp10_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
453 specialize qw/vp10_highbd_idct32x32_34_add/;
455 add_proto qw/void vp10_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
456 specialize qw/vp10_highbd_idct32x32_1_add/;
458 add_proto qw/void vp10_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
459 specialize qw/vp10_highbd_iwht4x4_1_add/;
461 add_proto qw/void vp10_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
462 specialize qw/vp10_highbd_iwht4x4_16_add/;
464 # Force C versions if CONFIG_EMULATE_HARDWARE is 1
465 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
466 add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
467 specialize qw/vp10_highbd_idct4x4_16_add/;
469 add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
470 specialize qw/vp10_highbd_idct8x8_64_add/;
472 add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
473 specialize qw/vp10_highbd_idct8x8_10_add/;
475 add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
476 specialize qw/vp10_highbd_idct16x16_256_add/;
478 add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
479 specialize qw/vp10_highbd_idct16x16_10_add/;
481 add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
482 specialize qw/vp10_highbd_idct4x4_16_add sse2/;
484 add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
485 specialize qw/vp10_highbd_idct8x8_64_add sse2/;
487 add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
488 specialize qw/vp10_highbd_idct8x8_10_add sse2/;
490 add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
491 specialize qw/vp10_highbd_idct16x16_256_add sse2/;
493 add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
494 specialize qw/vp10_highbd_idct16x16_10_add sse2/;
495 } # CONFIG_EMULATE_HARDWARE
497 # Force C versions if CONFIG_EMULATE_HARDWARE is 1
498 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
499 add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
500 specialize qw/vp10_idct4x4_1_add/;
502 add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
503 specialize qw/vp10_idct4x4_16_add/;
505 add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
506 specialize qw/vp10_idct8x8_1_add/;
508 add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
509 specialize qw/vp10_idct8x8_64_add/;
511 add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
512 specialize qw/vp10_idct8x8_12_add/;
514 add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
515 specialize qw/vp10_idct16x16_1_add/;
517 add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
518 specialize qw/vp10_idct16x16_256_add/;
520 add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
521 specialize qw/vp10_idct16x16_10_add/;
523 add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
524 specialize qw/vp10_idct32x32_1024_add/;
526 add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
527 specialize qw/vp10_idct32x32_34_add/;
529 add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
530 specialize qw/vp10_idct32x32_1_add/;
532 add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
533 specialize qw/vp10_iwht4x4_1_add/;
535 add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
536 specialize qw/vp10_iwht4x4_16_add/;
538 add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
539 specialize qw/vp10_idct4x4_1_add sse2/;
541 add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
542 specialize qw/vp10_idct4x4_16_add sse2/;
544 add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
545 specialize qw/vp10_idct8x8_1_add sse2/;
547 add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
548 specialize qw/vp10_idct8x8_64_add sse2/;
550 add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
551 specialize qw/vp10_idct8x8_12_add sse2/;
553 add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
554 specialize qw/vp10_idct16x16_1_add sse2/;
556 add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
557 specialize qw/vp10_idct16x16_256_add sse2/;
559 add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
560 specialize qw/vp10_idct16x16_10_add sse2/;
562 add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
563 specialize qw/vp10_idct32x32_1024_add sse2/;
565 add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
566 specialize qw/vp10_idct32x32_34_add sse2/;
568 add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
569 specialize qw/vp10_idct32x32_1_add sse2/;
571 add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
572 specialize qw/vp10_iwht4x4_1_add/;
574 add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
575 specialize qw/vp10_iwht4x4_16_add/;
576 } # CONFIG_EMULATE_HARDWARE
577 } # CONFIG_VP9_HIGHBITDEPTH
582 add_proto qw/int vp10_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
583 specialize qw/vp10_full_search_sad sse3 sse4_1/;
584 $vp10_full_search_sad_sse3=vp10_full_search_sadx3;
585 $vp10_full_search_sad_sse4_1=vp10_full_search_sadx8;
587 add_proto qw/int vp10_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
588 specialize qw/vp10_diamond_search_sad/;
590 add_proto qw/int vp10_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
591 specialize qw/vp10_full_range_search/;
593 add_proto qw/void vp10_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
594 specialize qw/vp10_temporal_filter_apply sse2 msa/;
596 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
600 add_proto qw/int64_t vp10_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
601 specialize qw/vp10_highbd_block_error sse2/;
603 add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
604 specialize qw/vp10_highbd_quantize_fp/;
606 add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
607 specialize qw/vp10_highbd_quantize_fp_32x32/;
610 add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
611 specialize qw/vp10_highbd_fht4x4/;
613 add_proto qw/void vp10_highbd_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
614 specialize qw/vp10_highbd_fht8x8/;
616 add_proto qw/void vp10_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
617 specialize qw/vp10_highbd_fht16x16/;
619 add_proto qw/void vp10_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
620 specialize qw/vp10_highbd_fwht4x4/;
622 add_proto qw/void vp10_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
623 specialize qw/vp10_highbd_temporal_filter_apply/;
626 # End vp10_high encoder functions
629 # end encoder functions