1 sub vp10_common_forward_decls() {
7 #include "vpx/vpx_integer.h"
8 #include "vp10/common/common.h"
9 #include "vp10/common/enums.h"
13 /* Encoder forward decls */
15 struct vp9_variance_vtable;
16 struct search_site_config;
19 struct yv12_buffer_config;
22 forward_decls qw/vp10_common_forward_decls/;
24 # x86inc.asm had specific constraints. break it out so it's easy to disable.
25 # zero all the variables to avoid tricky else conditions.
26 $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
28 $mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc =
29 $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = '';
30 if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
33 $sse2_x86inc = 'sse2';
34 $ssse3_x86inc = 'ssse3';
36 $avx2_x86inc = 'avx2';
37 if ($opts{arch} eq "x86_64") {
38 $mmx_x86_64_x86inc = 'mmx';
39 $sse_x86_64_x86inc = 'sse';
40 $sse2_x86_64_x86inc = 'sse2';
41 $ssse3_x86_64_x86inc = 'ssse3';
42 $avx_x86_64_x86inc = 'avx';
43 $avx2_x86_64_x86inc = 'avx2';
47 # functions that are 64 bit only.
48 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
49 if ($opts{arch} eq "x86_64") {
51 $sse2_x86_64 = 'sse2';
52 $ssse3_x86_64 = 'ssse3';
54 $avx2_x86_64 = 'avx2';
60 if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
61 add_proto qw/void vp10_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
62 specialize qw/vp10_mbpost_proc_down sse2/;
63 $vp10_mbpost_proc_down_sse2=vp10_mbpost_proc_down_xmm;
65 add_proto qw/void vp10_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
66 specialize qw/vp10_mbpost_proc_across_ip sse2/;
67 $vp10_mbpost_proc_across_ip_sse2=vp10_mbpost_proc_across_ip_xmm;
69 add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
70 specialize qw/vp10_post_proc_down_and_across sse2/;
71 $vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm;
73 add_proto qw/void vp10_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
74 specialize qw/vp10_plane_add_noise sse2/;
75 $vp10_plane_add_noise_sse2=vp10_plane_add_noise_wmt;
77 add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
78 specialize qw/vp10_filter_by_weight16x16 sse2 msa/;
80 add_proto qw/void vp10_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
81 specialize qw/vp10_filter_by_weight8x8 sse2 msa/;
87 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
88 # Note as optimized versions of these functions are added we need to add a check to ensure
89 # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
90 add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
91 specialize qw/vp10_iht4x4_16_add/;
93 add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
94 specialize qw/vp10_iht8x8_64_add/;
96 add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
97 specialize qw/vp10_iht16x16_256_add/;
99 # Force C versions if CONFIG_EMULATE_HARDWARE is 1
100 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
101 add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
102 specialize qw/vp10_iht4x4_16_add/;
104 add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
105 specialize qw/vp10_iht8x8_64_add/;
107 add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
108 specialize qw/vp10_iht16x16_256_add/;
110 add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
111 specialize qw/vp10_iht4x4_16_add sse2 neon dspr2 msa/;
113 add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
114 specialize qw/vp10_iht8x8_64_add sse2 neon dspr2 msa/;
116 add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
117 specialize qw/vp10_iht16x16_256_add sse2 dspr2 msa/;
121 # High bitdepth functions
122 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
126 add_proto qw/void vp10_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
127 specialize qw/vp10_highbd_convolve_copy/;
129 add_proto qw/void vp10_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
130 specialize qw/vp10_highbd_convolve_avg/;
132 add_proto qw/void vp10_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
133 specialize qw/vp10_highbd_convolve8/, "$sse2_x86_64";
135 add_proto qw/void vp10_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
136 specialize qw/vp10_highbd_convolve8_horiz/, "$sse2_x86_64";
138 add_proto qw/void vp10_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
139 specialize qw/vp10_highbd_convolve8_vert/, "$sse2_x86_64";
141 add_proto qw/void vp10_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
142 specialize qw/vp10_highbd_convolve8_avg/, "$sse2_x86_64";
144 add_proto qw/void vp10_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
145 specialize qw/vp10_highbd_convolve8_avg_horiz/, "$sse2_x86_64";
147 add_proto qw/void vp10_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
148 specialize qw/vp10_highbd_convolve8_avg_vert/, "$sse2_x86_64";
153 if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
154 add_proto qw/void vp10_highbd_mbpost_proc_down/, "uint16_t *dst, int pitch, int rows, int cols, int flimit";
155 specialize qw/vp10_highbd_mbpost_proc_down/;
157 add_proto qw/void vp10_highbd_mbpost_proc_across_ip/, "uint16_t *src, int pitch, int rows, int cols, int flimit";
158 specialize qw/vp10_highbd_mbpost_proc_across_ip/;
160 add_proto qw/void vp10_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
161 specialize qw/vp10_highbd_post_proc_down_and_across/;
163 add_proto qw/void vp10_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
164 specialize qw/vp10_highbd_plane_add_noise/;
170 # Note as optimized versions of these functions are added we need to add a check to ensure
171 # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
172 add_proto qw/void vp10_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
173 specialize qw/vp10_highbd_iht4x4_16_add/;
175 add_proto qw/void vp10_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
176 specialize qw/vp10_highbd_iht8x8_64_add/;
178 add_proto qw/void vp10_highbd_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd";
179 specialize qw/vp10_highbd_iht16x16_256_add/;
183 # Encoder functions below this point.
185 if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
187 add_proto qw/unsigned int vp10_avg_8x8/, "const uint8_t *, int p";
188 specialize qw/vp10_avg_8x8 sse2 neon msa/;
190 add_proto qw/unsigned int vp10_avg_4x4/, "const uint8_t *, int p";
191 specialize qw/vp10_avg_4x4 sse2 msa/;
193 add_proto qw/void vp10_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
194 specialize qw/vp10_minmax_8x8 sse2/;
196 add_proto qw/void vp10_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
197 specialize qw/vp10_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
199 add_proto qw/void vp10_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
200 specialize qw/vp10_hadamard_16x16 sse2/;
202 add_proto qw/int16_t vp10_satd/, "const int16_t *coeff, int length";
203 specialize qw/vp10_satd sse2/;
205 add_proto qw/void vp10_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height";
206 specialize qw/vp10_int_pro_row sse2 neon/;
208 add_proto qw/int16_t vp10_int_pro_col/, "uint8_t const *ref, const int width";
209 specialize qw/vp10_int_pro_col sse2 neon/;
211 add_proto qw/int vp10_vector_var/, "int16_t const *ref, int16_t const *src, const int bwl";
212 specialize qw/vp10_vector_var neon sse2/;
214 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
215 add_proto qw/unsigned int vp10_highbd_avg_8x8/, "const uint8_t *, int p";
216 specialize qw/vp10_highbd_avg_8x8/;
217 add_proto qw/unsigned int vp10_highbd_avg_4x4/, "const uint8_t *, int p";
218 specialize qw/vp10_highbd_avg_4x4/;
219 add_proto qw/void vp10_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
220 specialize qw/vp10_highbd_minmax_8x8/;
228 if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") {
229 add_proto qw/int vp10_denoiser_filter/, "const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude";
230 specialize qw/vp10_denoiser_filter sse2/;
233 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
234 # the transform coefficients are held in 32-bit
235 # values, so the assembler code for vp10_block_error can no longer be used.
236 add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
237 specialize qw/vp10_block_error/;
239 add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
240 specialize qw/vp10_quantize_fp/;
242 add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
243 specialize qw/vp10_quantize_fp_32x32/;
245 add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
246 specialize qw/vp10_fdct8x8_quant/;
248 add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
249 specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
251 add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
252 specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
254 add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
255 specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc";
257 add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
258 specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
260 add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
261 specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
266 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
267 add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
268 specialize qw/vp10_fht4x4 sse2/;
270 add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
271 specialize qw/vp10_fht8x8 sse2/;
273 add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
274 specialize qw/vp10_fht16x16 sse2/;
276 add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
277 specialize qw/vp10_fwht4x4/, "$mmx_x86inc";
279 add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
280 specialize qw/vp10_fht4x4 sse2 msa/;
282 add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
283 specialize qw/vp10_fht8x8 sse2 msa/;
285 add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
286 specialize qw/vp10_fht16x16 sse2 msa/;
288 add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
289 specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
293 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
294 # Note as optimized versions of these functions are added we need to add a check to ensure
295 # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
296 add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
297 specialize qw/vp10_idct4x4_1_add/;
299 add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
300 specialize qw/vp10_idct4x4_16_add/;
302 add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
303 specialize qw/vp10_idct8x8_1_add/;
305 add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
306 specialize qw/vp10_idct8x8_64_add/;
308 add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
309 specialize qw/vp10_idct8x8_12_add/;
311 add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
312 specialize qw/vp10_idct16x16_1_add/;
314 add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
315 specialize qw/vp10_idct16x16_256_add/;
317 add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
318 specialize qw/vp10_idct16x16_10_add/;
320 add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
321 specialize qw/vp10_idct32x32_1024_add/;
323 add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
324 specialize qw/vp10_idct32x32_34_add/;
326 add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
327 specialize qw/vp10_idct32x32_1_add/;
329 add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
330 specialize qw/vp10_iwht4x4_1_add/;
332 add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
333 specialize qw/vp10_iwht4x4_16_add/;
335 add_proto qw/void vp10_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
336 specialize qw/vp10_highbd_idct4x4_1_add/;
338 add_proto qw/void vp10_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
339 specialize qw/vp10_highbd_idct8x8_1_add/;
341 add_proto qw/void vp10_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
342 specialize qw/vp10_highbd_idct16x16_1_add/;
344 add_proto qw/void vp10_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
345 specialize qw/vp10_highbd_idct32x32_1024_add/;
347 add_proto qw/void vp10_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
348 specialize qw/vp10_highbd_idct32x32_34_add/;
350 add_proto qw/void vp10_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
351 specialize qw/vp10_highbd_idct32x32_1_add/;
353 add_proto qw/void vp10_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
354 specialize qw/vp10_highbd_iwht4x4_1_add/;
356 add_proto qw/void vp10_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
357 specialize qw/vp10_highbd_iwht4x4_16_add/;
359 # Force C versions if CONFIG_EMULATE_HARDWARE is 1
360 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
361 add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
362 specialize qw/vp10_highbd_idct4x4_16_add/;
364 add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
365 specialize qw/vp10_highbd_idct8x8_64_add/;
367 add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
368 specialize qw/vp10_highbd_idct8x8_10_add/;
370 add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
371 specialize qw/vp10_highbd_idct16x16_256_add/;
373 add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
374 specialize qw/vp10_highbd_idct16x16_10_add/;
376 add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
377 specialize qw/vp10_highbd_idct4x4_16_add sse2/;
379 add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
380 specialize qw/vp10_highbd_idct8x8_64_add sse2/;
382 add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
383 specialize qw/vp10_highbd_idct8x8_10_add sse2/;
385 add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
386 specialize qw/vp10_highbd_idct16x16_256_add sse2/;
388 add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
389 specialize qw/vp10_highbd_idct16x16_10_add sse2/;
390 } # CONFIG_EMULATE_HARDWARE
392 # Force C versions if CONFIG_EMULATE_HARDWARE is 1
393 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
394 add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
395 specialize qw/vp10_idct4x4_1_add/;
397 add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
398 specialize qw/vp10_idct4x4_16_add/;
400 add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
401 specialize qw/vp10_idct8x8_1_add/;
403 add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
404 specialize qw/vp10_idct8x8_64_add/;
406 add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
407 specialize qw/vp10_idct8x8_12_add/;
409 add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
410 specialize qw/vp10_idct16x16_1_add/;
412 add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
413 specialize qw/vp10_idct16x16_256_add/;
415 add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
416 specialize qw/vp10_idct16x16_10_add/;
418 add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
419 specialize qw/vp10_idct32x32_1024_add/;
421 add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
422 specialize qw/vp10_idct32x32_34_add/;
424 add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
425 specialize qw/vp10_idct32x32_1_add/;
427 add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
428 specialize qw/vp10_iwht4x4_1_add/;
430 add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
431 specialize qw/vp10_iwht4x4_16_add/;
433 add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
434 specialize qw/vp10_idct4x4_1_add sse2/;
436 add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
437 specialize qw/vp10_idct4x4_16_add sse2/;
439 add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
440 specialize qw/vp10_idct8x8_1_add sse2/;
442 add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
443 specialize qw/vp10_idct8x8_64_add sse2/;
445 add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
446 specialize qw/vp10_idct8x8_12_add sse2/;
448 add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
449 specialize qw/vp10_idct16x16_1_add sse2/;
451 add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
452 specialize qw/vp10_idct16x16_256_add sse2/;
454 add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
455 specialize qw/vp10_idct16x16_10_add sse2/;
457 add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
458 specialize qw/vp10_idct32x32_1024_add sse2/;
460 add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
461 specialize qw/vp10_idct32x32_34_add sse2/;
463 add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
464 specialize qw/vp10_idct32x32_1_add sse2/;
466 add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
467 specialize qw/vp10_iwht4x4_1_add/;
469 add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
470 specialize qw/vp10_iwht4x4_16_add/;
471 } # CONFIG_EMULATE_HARDWARE
472 } # CONFIG_VP9_HIGHBITDEPTH
477 add_proto qw/int vp10_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
478 specialize qw/vp10_full_search_sad sse3 sse4_1/;
479 $vp10_full_search_sad_sse3=vp10_full_search_sadx3;
480 $vp10_full_search_sad_sse4_1=vp10_full_search_sadx8;
482 add_proto qw/int vp10_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
483 specialize qw/vp10_diamond_search_sad/;
485 add_proto qw/int vp10_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
486 specialize qw/vp10_full_range_search/;
488 add_proto qw/void vp10_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
489 specialize qw/vp10_temporal_filter_apply sse2 msa/;
491 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
495 add_proto qw/int64_t vp10_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
496 specialize qw/vp10_highbd_block_error sse2/;
498 add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
499 specialize qw/vp10_highbd_quantize_fp/;
501 add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
502 specialize qw/vp10_highbd_quantize_fp_32x32/;
505 add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
506 specialize qw/vp10_highbd_fht4x4/;
508 add_proto qw/void vp10_highbd_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
509 specialize qw/vp10_highbd_fht8x8/;
511 add_proto qw/void vp10_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
512 specialize qw/vp10_highbd_fht16x16/;
514 add_proto qw/void vp10_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
515 specialize qw/vp10_highbd_fwht4x4/;
517 add_proto qw/void vp10_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
518 specialize qw/vp10_highbd_temporal_filter_apply/;
521 # End vp10_high encoder functions
524 # end encoder functions