]> granicus.if.org Git - libvpx/blob - test/convolve_test.cc
Merge "build/make/configure.sh: Fix armv7 builds in Xcode7."
[libvpx] / test / convolve_test.cc
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11 #include <string.h>
12
13 #include "third_party/googletest/src/include/gtest/gtest.h"
14
15 #include "./vpx_config.h"
16 #include "./vp9_rtcd.h"
17 #include "./vpx_dsp_rtcd.h"
18 #include "test/acm_random.h"
19 #include "test/clear_system_state.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
22 #include "vp9/common/vp9_common.h"
23 #include "vp9/common/vp9_filter.h"
24 #include "vpx_dsp/vpx_dsp_common.h"
25 #include "vpx_dsp/vpx_filter.h"
26 #include "vpx_mem/vpx_mem.h"
27 #include "vpx_ports/mem.h"
28
29 namespace {
30
31 static const unsigned int kMaxDimension = 64;
32
33 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
34                              uint8_t *dst, ptrdiff_t dst_stride,
35                              const int16_t *filter_x, int filter_x_stride,
36                              const int16_t *filter_y, int filter_y_stride,
37                              int w, int h);
38
39 struct ConvolveFunctions {
40   ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg,
41                     ConvolveFunc h8, ConvolveFunc h8_avg,
42                     ConvolveFunc v8, ConvolveFunc v8_avg,
43                     ConvolveFunc hv8, ConvolveFunc hv8_avg,
44                     ConvolveFunc sh8, ConvolveFunc sh8_avg,
45                     ConvolveFunc sv8, ConvolveFunc sv8_avg,
46                     ConvolveFunc shv8, ConvolveFunc shv8_avg,
47                     int bd)
48       : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
49         v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
50         sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
51         use_highbd_(bd) {}
52
53   ConvolveFunc copy_;
54   ConvolveFunc avg_;
55   ConvolveFunc h8_;
56   ConvolveFunc v8_;
57   ConvolveFunc hv8_;
58   ConvolveFunc h8_avg_;
59   ConvolveFunc v8_avg_;
60   ConvolveFunc hv8_avg_;
61   ConvolveFunc sh8_;        // scaled horiz
62   ConvolveFunc sv8_;        // scaled vert
63   ConvolveFunc shv8_;       // scaled horiz/vert
64   ConvolveFunc sh8_avg_;    // scaled avg horiz
65   ConvolveFunc sv8_avg_;    // scaled avg vert
66   ConvolveFunc shv8_avg_;   // scaled avg horiz/vert
67   int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
68 };
69
70 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
71
72 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
73 #define VP9_FILTER_WEIGHT 128
74 #define VP9_FILTER_SHIFT 7
75 uint8_t clip_pixel(int x) {
76   return x < 0 ? 0 :
77          x > 255 ? 255 :
78          x;
79 }
80
81 void filter_block2d_8_c(const uint8_t *src_ptr,
82                         const unsigned int src_stride,
83                         const int16_t *HFilter,
84                         const int16_t *VFilter,
85                         uint8_t *dst_ptr,
86                         unsigned int dst_stride,
87                         unsigned int output_width,
88                         unsigned int output_height) {
89   // Between passes, we use an intermediate buffer whose height is extended to
90   // have enough horizontally filtered values as input for the vertical pass.
91   // This buffer is allocated to be big enough for the largest block type we
92   // support.
93   const int kInterp_Extend = 4;
94   const unsigned int intermediate_height =
95       (kInterp_Extend - 1) + output_height + kInterp_Extend;
96   unsigned int i, j;
97
98   // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
99   // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
100   //                                 + kInterp_Extend
101   //                               = 3 + 16 + 4
102   //                               = 23
103   // and filter_max_width          = 16
104   //
105   uint8_t intermediate_buffer[71 * kMaxDimension];
106   const int intermediate_next_stride = 1 - intermediate_height * output_width;
107
108   // Horizontal pass (src -> transposed intermediate).
109   uint8_t *output_ptr = intermediate_buffer;
110   const int src_next_row_stride = src_stride - output_width;
111   src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
112   for (i = 0; i < intermediate_height; ++i) {
113     for (j = 0; j < output_width; ++j) {
114       // Apply filter...
115       const int temp = (src_ptr[0] * HFilter[0]) +
116           (src_ptr[1] * HFilter[1]) +
117           (src_ptr[2] * HFilter[2]) +
118           (src_ptr[3] * HFilter[3]) +
119           (src_ptr[4] * HFilter[4]) +
120           (src_ptr[5] * HFilter[5]) +
121           (src_ptr[6] * HFilter[6]) +
122           (src_ptr[7] * HFilter[7]) +
123           (VP9_FILTER_WEIGHT >> 1);  // Rounding
124
125       // Normalize back to 0-255...
126       *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
127       ++src_ptr;
128       output_ptr += intermediate_height;
129     }
130     src_ptr += src_next_row_stride;
131     output_ptr += intermediate_next_stride;
132   }
133
134   // Vertical pass (transposed intermediate -> dst).
135   src_ptr = intermediate_buffer;
136   const int dst_next_row_stride = dst_stride - output_width;
137   for (i = 0; i < output_height; ++i) {
138     for (j = 0; j < output_width; ++j) {
139       // Apply filter...
140       const int temp = (src_ptr[0] * VFilter[0]) +
141           (src_ptr[1] * VFilter[1]) +
142           (src_ptr[2] * VFilter[2]) +
143           (src_ptr[3] * VFilter[3]) +
144           (src_ptr[4] * VFilter[4]) +
145           (src_ptr[5] * VFilter[5]) +
146           (src_ptr[6] * VFilter[6]) +
147           (src_ptr[7] * VFilter[7]) +
148           (VP9_FILTER_WEIGHT >> 1);  // Rounding
149
150       // Normalize back to 0-255...
151       *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
152       src_ptr += intermediate_height;
153     }
154     src_ptr += intermediate_next_stride;
155     dst_ptr += dst_next_row_stride;
156   }
157 }
158
159 void block2d_average_c(uint8_t *src,
160                        unsigned int src_stride,
161                        uint8_t *output_ptr,
162                        unsigned int output_stride,
163                        unsigned int output_width,
164                        unsigned int output_height) {
165   unsigned int i, j;
166   for (i = 0; i < output_height; ++i) {
167     for (j = 0; j < output_width; ++j) {
168       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
169     }
170     output_ptr += output_stride;
171   }
172 }
173
174 void filter_average_block2d_8_c(const uint8_t *src_ptr,
175                                 const unsigned int src_stride,
176                                 const int16_t *HFilter,
177                                 const int16_t *VFilter,
178                                 uint8_t *dst_ptr,
179                                 unsigned int dst_stride,
180                                 unsigned int output_width,
181                                 unsigned int output_height) {
182   uint8_t tmp[kMaxDimension * kMaxDimension];
183
184   assert(output_width <= kMaxDimension);
185   assert(output_height <= kMaxDimension);
186   filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
187                      output_width, output_height);
188   block2d_average_c(tmp, 64, dst_ptr, dst_stride,
189                     output_width, output_height);
190 }
191
192 #if CONFIG_VP9_HIGHBITDEPTH
193 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
194                                const unsigned int src_stride,
195                                const int16_t *HFilter,
196                                const int16_t *VFilter,
197                                uint16_t *dst_ptr,
198                                unsigned int dst_stride,
199                                unsigned int output_width,
200                                unsigned int output_height,
201                                int bd) {
202   // Between passes, we use an intermediate buffer whose height is extended to
203   // have enough horizontally filtered values as input for the vertical pass.
204   // This buffer is allocated to be big enough for the largest block type we
205   // support.
206   const int kInterp_Extend = 4;
207   const unsigned int intermediate_height =
208       (kInterp_Extend - 1) + output_height + kInterp_Extend;
209
210   /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
211    * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
212    *                                 + kInterp_Extend
213    *                               = 3 + 16 + 4
214    *                               = 23
215    * and filter_max_width = 16
216    */
217   uint16_t intermediate_buffer[71 * kMaxDimension];
218   const int intermediate_next_stride = 1 - intermediate_height * output_width;
219
220   // Horizontal pass (src -> transposed intermediate).
221   {
222     uint16_t *output_ptr = intermediate_buffer;
223     const int src_next_row_stride = src_stride - output_width;
224     unsigned int i, j;
225     src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
226     for (i = 0; i < intermediate_height; ++i) {
227       for (j = 0; j < output_width; ++j) {
228         // Apply filter...
229         const int temp = (src_ptr[0] * HFilter[0]) +
230                          (src_ptr[1] * HFilter[1]) +
231                          (src_ptr[2] * HFilter[2]) +
232                          (src_ptr[3] * HFilter[3]) +
233                          (src_ptr[4] * HFilter[4]) +
234                          (src_ptr[5] * HFilter[5]) +
235                          (src_ptr[6] * HFilter[6]) +
236                          (src_ptr[7] * HFilter[7]) +
237                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
238
239         // Normalize back to 0-255...
240         *output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
241         ++src_ptr;
242         output_ptr += intermediate_height;
243       }
244       src_ptr += src_next_row_stride;
245       output_ptr += intermediate_next_stride;
246     }
247   }
248
249   // Vertical pass (transposed intermediate -> dst).
250   {
251     uint16_t *src_ptr = intermediate_buffer;
252     const int dst_next_row_stride = dst_stride - output_width;
253     unsigned int i, j;
254     for (i = 0; i < output_height; ++i) {
255       for (j = 0; j < output_width; ++j) {
256         // Apply filter...
257         const int temp = (src_ptr[0] * VFilter[0]) +
258                          (src_ptr[1] * VFilter[1]) +
259                          (src_ptr[2] * VFilter[2]) +
260                          (src_ptr[3] * VFilter[3]) +
261                          (src_ptr[4] * VFilter[4]) +
262                          (src_ptr[5] * VFilter[5]) +
263                          (src_ptr[6] * VFilter[6]) +
264                          (src_ptr[7] * VFilter[7]) +
265                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
266
267         // Normalize back to 0-255...
268         *dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
269         src_ptr += intermediate_height;
270       }
271       src_ptr += intermediate_next_stride;
272       dst_ptr += dst_next_row_stride;
273     }
274   }
275 }
276
277 void highbd_block2d_average_c(uint16_t *src,
278                               unsigned int src_stride,
279                               uint16_t *output_ptr,
280                               unsigned int output_stride,
281                               unsigned int output_width,
282                               unsigned int output_height,
283                               int bd) {
284   unsigned int i, j;
285   for (i = 0; i < output_height; ++i) {
286     for (j = 0; j < output_width; ++j) {
287       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
288     }
289     output_ptr += output_stride;
290   }
291 }
292
293 void highbd_filter_average_block2d_8_c(const uint16_t *src_ptr,
294                                        const unsigned int src_stride,
295                                        const int16_t *HFilter,
296                                        const int16_t *VFilter,
297                                        uint16_t *dst_ptr,
298                                        unsigned int dst_stride,
299                                        unsigned int output_width,
300                                        unsigned int output_height,
301                                        int bd) {
302   uint16_t tmp[kMaxDimension * kMaxDimension];
303
304   assert(output_width <= kMaxDimension);
305   assert(output_height <= kMaxDimension);
306   highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
307                             output_width, output_height, bd);
308   highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
309                            output_width, output_height, bd);
310 }
311 #endif  // CONFIG_VP9_HIGHBITDEPTH
312
313 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
314  public:
315   static void SetUpTestCase() {
316     // Force input_ to be unaligned, output to be 16 byte aligned.
317     input_ = reinterpret_cast<uint8_t*>(
318         vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
319     output_ = reinterpret_cast<uint8_t*>(
320         vpx_memalign(kDataAlignment, kOutputBufferSize));
321     output_ref_ = reinterpret_cast<uint8_t*>(
322         vpx_memalign(kDataAlignment, kOutputBufferSize));
323 #if CONFIG_VP9_HIGHBITDEPTH
324     input16_ = reinterpret_cast<uint16_t*>(
325         vpx_memalign(kDataAlignment,
326                      (kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
327     output16_ = reinterpret_cast<uint16_t*>(
328         vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
329     output16_ref_ = reinterpret_cast<uint16_t*>(
330         vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
331 #endif
332   }
333
334   virtual void TearDown() { libvpx_test::ClearSystemState(); }
335
336   static void TearDownTestCase() {
337     vpx_free(input_ - 1);
338     input_ = NULL;
339     vpx_free(output_);
340     output_ = NULL;
341     vpx_free(output_ref_);
342     output_ref_ = NULL;
343 #if CONFIG_VP9_HIGHBITDEPTH
344     vpx_free(input16_ - 1);
345     input16_ = NULL;
346     vpx_free(output16_);
347     output16_ = NULL;
348     vpx_free(output16_ref_);
349     output16_ref_ = NULL;
350 #endif
351   }
352
353  protected:
354   static const int kDataAlignment = 16;
355   static const int kOuterBlockSize = 256;
356   static const int kInputStride = kOuterBlockSize;
357   static const int kOutputStride = kOuterBlockSize;
358   static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
359   static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
360
361   int Width() const { return GET_PARAM(0); }
362   int Height() const { return GET_PARAM(1); }
363   int BorderLeft() const {
364     const int center = (kOuterBlockSize - Width()) / 2;
365     return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
366   }
367   int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
368
369   bool IsIndexInBorder(int i) {
370     return (i < BorderTop() * kOuterBlockSize ||
371             i >= (BorderTop() + Height()) * kOuterBlockSize ||
372             i % kOuterBlockSize < BorderLeft() ||
373             i % kOuterBlockSize >= (BorderLeft() + Width()));
374   }
375
376   virtual void SetUp() {
377     UUT_ = GET_PARAM(2);
378 #if CONFIG_VP9_HIGHBITDEPTH
379     if (UUT_->use_highbd_ != 0)
380       mask_ = (1 << UUT_->use_highbd_) - 1;
381     else
382       mask_ = 255;
383 #endif
384     /* Set up guard blocks for an inner block centered in the outer block */
385     for (int i = 0; i < kOutputBufferSize; ++i) {
386       if (IsIndexInBorder(i))
387         output_[i] = 255;
388       else
389         output_[i] = 0;
390     }
391
392     ::libvpx_test::ACMRandom prng;
393     for (int i = 0; i < kInputBufferSize; ++i) {
394       if (i & 1) {
395         input_[i] = 255;
396 #if CONFIG_VP9_HIGHBITDEPTH
397         input16_[i] = mask_;
398 #endif
399       } else {
400         input_[i] = prng.Rand8Extremes();
401 #if CONFIG_VP9_HIGHBITDEPTH
402         input16_[i] = prng.Rand16() & mask_;
403 #endif
404       }
405     }
406   }
407
408   void SetConstantInput(int value) {
409     memset(input_, value, kInputBufferSize);
410 #if CONFIG_VP9_HIGHBITDEPTH
411     vpx_memset16(input16_, value, kInputBufferSize);
412 #endif
413   }
414
415   void CopyOutputToRef() {
416     memcpy(output_ref_, output_, kOutputBufferSize);
417 #if CONFIG_VP9_HIGHBITDEPTH
418     memcpy(output16_ref_, output16_, kOutputBufferSize);
419 #endif
420   }
421
422   void CheckGuardBlocks() {
423     for (int i = 0; i < kOutputBufferSize; ++i) {
424       if (IsIndexInBorder(i))
425         EXPECT_EQ(255, output_[i]);
426     }
427   }
428
429   uint8_t *input() const {
430 #if CONFIG_VP9_HIGHBITDEPTH
431     if (UUT_->use_highbd_ == 0) {
432       return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
433     } else {
434       return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize +
435                                 BorderLeft());
436     }
437 #else
438     return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
439 #endif
440   }
441
442   uint8_t *output() const {
443 #if CONFIG_VP9_HIGHBITDEPTH
444     if (UUT_->use_highbd_ == 0) {
445       return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
446     } else {
447       return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize +
448                                 BorderLeft());
449     }
450 #else
451     return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
452 #endif
453   }
454
455   uint8_t *output_ref() const {
456 #if CONFIG_VP9_HIGHBITDEPTH
457     if (UUT_->use_highbd_ == 0) {
458       return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
459     } else {
460       return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize +
461                                 BorderLeft());
462     }
463 #else
464     return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
465 #endif
466   }
467
468   uint16_t lookup(uint8_t *list, int index) const {
469 #if CONFIG_VP9_HIGHBITDEPTH
470     if (UUT_->use_highbd_ == 0) {
471       return list[index];
472     } else {
473       return CONVERT_TO_SHORTPTR(list)[index];
474     }
475 #else
476     return list[index];
477 #endif
478   }
479
480   void assign_val(uint8_t *list, int index, uint16_t val) const {
481 #if CONFIG_VP9_HIGHBITDEPTH
482     if (UUT_->use_highbd_ == 0) {
483       list[index] = (uint8_t) val;
484     } else {
485       CONVERT_TO_SHORTPTR(list)[index] = val;
486     }
487 #else
488     list[index] = (uint8_t) val;
489 #endif
490   }
491
492   void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr,
493                                           const unsigned int src_stride,
494                                           const int16_t *HFilter,
495                                           const int16_t *VFilter,
496                                           uint8_t *dst_ptr,
497                                           unsigned int dst_stride,
498                                           unsigned int output_width,
499                                           unsigned int output_height) {
500 #if CONFIG_VP9_HIGHBITDEPTH
501     if (UUT_->use_highbd_ == 0) {
502       filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
503                                  dst_ptr, dst_stride, output_width,
504                                  output_height);
505     } else {
506       highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr),
507                                         src_stride, HFilter, VFilter,
508                                         CONVERT_TO_SHORTPTR(dst_ptr),
509                                         dst_stride, output_width, output_height,
510                                         UUT_->use_highbd_);
511     }
512 #else
513     filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
514                                dst_ptr, dst_stride, output_width,
515                                output_height);
516 #endif
517   }
518
519   void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
520                                   const unsigned int src_stride,
521                                   const int16_t *HFilter,
522                                   const int16_t *VFilter,
523                                   uint8_t *dst_ptr,
524                                   unsigned int dst_stride,
525                                   unsigned int output_width,
526                                   unsigned int output_height) {
527 #if CONFIG_VP9_HIGHBITDEPTH
528     if (UUT_->use_highbd_ == 0) {
529       filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
530                          dst_ptr, dst_stride, output_width, output_height);
531     } else {
532       highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
533                                 HFilter, VFilter,
534                                 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
535                                 output_width, output_height, UUT_->use_highbd_);
536     }
537 #else
538     filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
539                        dst_ptr, dst_stride, output_width, output_height);
540 #endif
541   }
542
543   const ConvolveFunctions* UUT_;
544   static uint8_t* input_;
545   static uint8_t* output_;
546   static uint8_t* output_ref_;
547 #if CONFIG_VP9_HIGHBITDEPTH
548   static uint16_t* input16_;
549   static uint16_t* output16_;
550   static uint16_t* output16_ref_;
551   int mask_;
552 #endif
553 };
554
555 uint8_t* ConvolveTest::input_ = NULL;
556 uint8_t* ConvolveTest::output_ = NULL;
557 uint8_t* ConvolveTest::output_ref_ = NULL;
558 #if CONFIG_VP9_HIGHBITDEPTH
559 uint16_t* ConvolveTest::input16_ = NULL;
560 uint16_t* ConvolveTest::output16_ = NULL;
561 uint16_t* ConvolveTest::output16_ref_ = NULL;
562 #endif
563
564 TEST_P(ConvolveTest, GuardBlocks) {
565   CheckGuardBlocks();
566 }
567
568 TEST_P(ConvolveTest, Copy) {
569   uint8_t* const in = input();
570   uint8_t* const out = output();
571
572   ASM_REGISTER_STATE_CHECK(
573       UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
574                   Width(), Height()));
575
576   CheckGuardBlocks();
577
578   for (int y = 0; y < Height(); ++y)
579     for (int x = 0; x < Width(); ++x)
580       ASSERT_EQ(lookup(out, y * kOutputStride + x),
581                 lookup(in, y * kInputStride + x))
582           << "(" << x << "," << y << ")";
583 }
584
585 TEST_P(ConvolveTest, Avg) {
586   uint8_t* const in = input();
587   uint8_t* const out = output();
588   uint8_t* const out_ref = output_ref();
589   CopyOutputToRef();
590
591   ASM_REGISTER_STATE_CHECK(
592       UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
593                 Width(), Height()));
594
595   CheckGuardBlocks();
596
597   for (int y = 0; y < Height(); ++y)
598     for (int x = 0; x < Width(); ++x)
599       ASSERT_EQ(lookup(out, y * kOutputStride + x),
600                 ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
601                                    lookup(out_ref, y * kOutputStride + x), 1))
602           << "(" << x << "," << y << ")";
603 }
604
605 TEST_P(ConvolveTest, CopyHoriz) {
606   uint8_t* const in = input();
607   uint8_t* const out = output();
608   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
609
610   ASM_REGISTER_STATE_CHECK(
611       UUT_->sh8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
612                  Width(), Height()));
613
614   CheckGuardBlocks();
615
616   for (int y = 0; y < Height(); ++y)
617     for (int x = 0; x < Width(); ++x)
618       ASSERT_EQ(lookup(out, y * kOutputStride + x),
619                 lookup(in, y * kInputStride + x))
620           << "(" << x << "," << y << ")";
621 }
622
623 TEST_P(ConvolveTest, CopyVert) {
624   uint8_t* const in = input();
625   uint8_t* const out = output();
626   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
627
628   ASM_REGISTER_STATE_CHECK(
629       UUT_->sv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
630                  Width(), Height()));
631
632   CheckGuardBlocks();
633
634   for (int y = 0; y < Height(); ++y)
635     for (int x = 0; x < Width(); ++x)
636       ASSERT_EQ(lookup(out, y * kOutputStride + x),
637                 lookup(in, y * kInputStride + x))
638           << "(" << x << "," << y << ")";
639 }
640
641 TEST_P(ConvolveTest, Copy2D) {
642   uint8_t* const in = input();
643   uint8_t* const out = output();
644   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
645
646   ASM_REGISTER_STATE_CHECK(
647       UUT_->shv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8,
648                   16, Width(), Height()));
649
650   CheckGuardBlocks();
651
652   for (int y = 0; y < Height(); ++y)
653     for (int x = 0; x < Width(); ++x)
654       ASSERT_EQ(lookup(out, y * kOutputStride + x),
655                 lookup(in, y * kInputStride + x))
656           << "(" << x << "," << y << ")";
657 }
658
659 const int kNumFilterBanks = 4;
660 const int kNumFilters = 16;
661
662 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
663   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
664     const InterpKernel *filters =
665         vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
666     for (int i = 0; i < kNumFilters; i++) {
667       const int p0 = filters[i][0] + filters[i][1];
668       const int p1 = filters[i][2] + filters[i][3];
669       const int p2 = filters[i][4] + filters[i][5];
670       const int p3 = filters[i][6] + filters[i][7];
671       EXPECT_LE(p0, 128);
672       EXPECT_LE(p1, 128);
673       EXPECT_LE(p2, 128);
674       EXPECT_LE(p3, 128);
675       EXPECT_LE(p0 + p3, 128);
676       EXPECT_LE(p0 + p3 + p1, 128);
677       EXPECT_LE(p0 + p3 + p1 + p2, 128);
678       EXPECT_EQ(p0 + p1 + p2 + p3, 128);
679     }
680   }
681 }
682
683 const int16_t kInvalidFilter[8] = { 0 };
684
685 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
686   uint8_t* const in = input();
687   uint8_t* const out = output();
688 #if CONFIG_VP9_HIGHBITDEPTH
689   uint8_t ref8[kOutputStride * kMaxDimension];
690   uint16_t ref16[kOutputStride * kMaxDimension];
691   uint8_t* ref;
692   if (UUT_->use_highbd_ == 0) {
693     ref = ref8;
694   } else {
695     ref = CONVERT_TO_BYTEPTR(ref16);
696   }
697 #else
698   uint8_t ref[kOutputStride * kMaxDimension];
699 #endif
700
701   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
702     const InterpKernel *filters =
703         vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
704
705     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
706       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
707         wrapper_filter_block2d_8_c(in, kInputStride,
708                                    filters[filter_x], filters[filter_y],
709                                    ref, kOutputStride,
710                                    Width(), Height());
711
712         if (filter_x && filter_y)
713           ASM_REGISTER_STATE_CHECK(
714               UUT_->hv8_(in, kInputStride, out, kOutputStride,
715                          filters[filter_x], 16, filters[filter_y], 16,
716                          Width(), Height()));
717         else if (filter_y)
718           ASM_REGISTER_STATE_CHECK(
719               UUT_->v8_(in, kInputStride, out, kOutputStride,
720                         kInvalidFilter, 16, filters[filter_y], 16,
721                         Width(), Height()));
722         else if (filter_x)
723           ASM_REGISTER_STATE_CHECK(
724               UUT_->h8_(in, kInputStride, out, kOutputStride,
725                         filters[filter_x], 16, kInvalidFilter, 16,
726                         Width(), Height()));
727         else
728           ASM_REGISTER_STATE_CHECK(
729               UUT_->copy_(in, kInputStride, out, kOutputStride,
730                           kInvalidFilter, 0, kInvalidFilter, 0,
731                           Width(), Height()));
732
733         CheckGuardBlocks();
734
735         for (int y = 0; y < Height(); ++y)
736           for (int x = 0; x < Width(); ++x)
737             ASSERT_EQ(lookup(ref, y * kOutputStride + x),
738                       lookup(out, y * kOutputStride + x))
739                 << "mismatch at (" << x << "," << y << "), "
740                 << "filters (" << filter_bank << ","
741                 << filter_x << "," << filter_y << ")";
742       }
743     }
744   }
745 }
746
747 TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
748   uint8_t* const in = input();
749   uint8_t* const out = output();
750 #if CONFIG_VP9_HIGHBITDEPTH
751   uint8_t ref8[kOutputStride * kMaxDimension];
752   uint16_t ref16[kOutputStride * kMaxDimension];
753   uint8_t* ref;
754   if (UUT_->use_highbd_ == 0) {
755     ref = ref8;
756   } else {
757     ref = CONVERT_TO_BYTEPTR(ref16);
758   }
759 #else
760   uint8_t ref[kOutputStride * kMaxDimension];
761 #endif
762
763   // Populate ref and out with some random data
764   ::libvpx_test::ACMRandom prng;
765   for (int y = 0; y < Height(); ++y) {
766     for (int x = 0; x < Width(); ++x) {
767       uint16_t r;
768 #if CONFIG_VP9_HIGHBITDEPTH
769       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
770         r = prng.Rand8Extremes();
771       } else {
772         r = prng.Rand16() & mask_;
773       }
774 #else
775       r = prng.Rand8Extremes();
776 #endif
777
778       assign_val(out, y * kOutputStride + x, r);
779       assign_val(ref, y * kOutputStride + x, r);
780     }
781   }
782
783   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
784     const InterpKernel *filters =
785         vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
786
787     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
788       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
789         wrapper_filter_average_block2d_8_c(in, kInputStride,
790                                            filters[filter_x], filters[filter_y],
791                                            ref, kOutputStride,
792                                            Width(), Height());
793
794         if (filter_x && filter_y)
795           ASM_REGISTER_STATE_CHECK(
796               UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
797                              filters[filter_x], 16, filters[filter_y], 16,
798                              Width(), Height()));
799         else if (filter_y)
800           ASM_REGISTER_STATE_CHECK(
801               UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
802                             kInvalidFilter, 16, filters[filter_y], 16,
803                             Width(), Height()));
804         else if (filter_x)
805           ASM_REGISTER_STATE_CHECK(
806               UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
807                             filters[filter_x], 16, kInvalidFilter, 16,
808                             Width(), Height()));
809         else
810           ASM_REGISTER_STATE_CHECK(
811               UUT_->avg_(in, kInputStride, out, kOutputStride,
812                           kInvalidFilter, 0, kInvalidFilter, 0,
813                           Width(), Height()));
814
815         CheckGuardBlocks();
816
817         for (int y = 0; y < Height(); ++y)
818           for (int x = 0; x < Width(); ++x)
819             ASSERT_EQ(lookup(ref, y * kOutputStride + x),
820                       lookup(out, y * kOutputStride + x))
821                 << "mismatch at (" << x << "," << y << "), "
822                 << "filters (" << filter_bank << ","
823                 << filter_x << "," << filter_y << ")";
824       }
825     }
826   }
827 }
828
829 TEST_P(ConvolveTest, FilterExtremes) {
830   uint8_t *const in = input();
831   uint8_t *const out = output();
832 #if CONFIG_VP9_HIGHBITDEPTH
833   uint8_t ref8[kOutputStride * kMaxDimension];
834   uint16_t ref16[kOutputStride * kMaxDimension];
835   uint8_t *ref;
836   if (UUT_->use_highbd_ == 0) {
837     ref = ref8;
838   } else {
839     ref = CONVERT_TO_BYTEPTR(ref16);
840   }
841 #else
842   uint8_t ref[kOutputStride * kMaxDimension];
843 #endif
844
845   // Populate ref and out with some random data
846   ::libvpx_test::ACMRandom prng;
847   for (int y = 0; y < Height(); ++y) {
848     for (int x = 0; x < Width(); ++x) {
849       uint16_t r;
850 #if CONFIG_VP9_HIGHBITDEPTH
851       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
852         r = prng.Rand8Extremes();
853       } else {
854         r = prng.Rand16() & mask_;
855       }
856 #else
857       r = prng.Rand8Extremes();
858 #endif
859       assign_val(out, y * kOutputStride + x, r);
860       assign_val(ref, y * kOutputStride + x, r);
861     }
862   }
863
864   for (int axis = 0; axis < 2; axis++) {
865     int seed_val = 0;
866     while (seed_val < 256) {
867       for (int y = 0; y < 8; ++y) {
868         for (int x = 0; x < 8; ++x) {
869 #if CONFIG_VP9_HIGHBITDEPTH
870             assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
871                        ((seed_val >> (axis ? y : x)) & 1) * mask_);
872 #else
873             assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
874                        ((seed_val >> (axis ? y : x)) & 1) * 255);
875 #endif
876           if (axis) seed_val++;
877         }
878         if (axis)
879           seed_val-= 8;
880         else
881           seed_val++;
882       }
883       if (axis) seed_val += 8;
884
885       for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
886         const InterpKernel *filters =
887             vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
888         for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
889           for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
890             wrapper_filter_block2d_8_c(in, kInputStride,
891                                        filters[filter_x], filters[filter_y],
892                                        ref, kOutputStride,
893                                        Width(), Height());
894             if (filter_x && filter_y)
895               ASM_REGISTER_STATE_CHECK(
896                   UUT_->hv8_(in, kInputStride, out, kOutputStride,
897                              filters[filter_x], 16, filters[filter_y], 16,
898                              Width(), Height()));
899             else if (filter_y)
900               ASM_REGISTER_STATE_CHECK(
901                   UUT_->v8_(in, kInputStride, out, kOutputStride,
902                             kInvalidFilter, 16, filters[filter_y], 16,
903                             Width(), Height()));
904             else if (filter_x)
905               ASM_REGISTER_STATE_CHECK(
906                   UUT_->h8_(in, kInputStride, out, kOutputStride,
907                             filters[filter_x], 16, kInvalidFilter, 16,
908                             Width(), Height()));
909             else
910               ASM_REGISTER_STATE_CHECK(
911                   UUT_->copy_(in, kInputStride, out, kOutputStride,
912                               kInvalidFilter, 0, kInvalidFilter, 0,
913                               Width(), Height()));
914
915             for (int y = 0; y < Height(); ++y)
916               for (int x = 0; x < Width(); ++x)
917                 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
918                           lookup(out, y * kOutputStride + x))
919                     << "mismatch at (" << x << "," << y << "), "
920                     << "filters (" << filter_bank << ","
921                     << filter_x << "," << filter_y << ")";
922           }
923         }
924       }
925     }
926   }
927 }
928
929 /* This test exercises that enough rows and columns are filtered with every
930    possible initial fractional positions and scaling steps. */
931 TEST_P(ConvolveTest, CheckScalingFiltering) {
932   uint8_t* const in = input();
933   uint8_t* const out = output();
934   const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP];
935
936   SetConstantInput(127);
937
938   for (int frac = 0; frac < 16; ++frac) {
939     for (int step = 1; step <= 32; ++step) {
940       /* Test the horizontal and vertical filters in combination. */
941       ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
942                                            eighttap[frac], step,
943                                            eighttap[frac], step,
944                                            Width(), Height()));
945
946       CheckGuardBlocks();
947
948       for (int y = 0; y < Height(); ++y) {
949         for (int x = 0; x < Width(); ++x) {
950           ASSERT_EQ(lookup(in, y * kInputStride + x),
951                     lookup(out, y * kOutputStride + x))
952               << "x == " << x << ", y == " << y
953               << ", frac == " << frac << ", step == " << step;
954         }
955       }
956     }
957   }
958 }
959
960 using std::tr1::make_tuple;
961
962 #if CONFIG_VP9_HIGHBITDEPTH
963 #if HAVE_SSE2 && ARCH_X86_64
964 void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
965                                  uint8_t *dst, ptrdiff_t dst_stride,
966                                  const int16_t *filter_x,
967                                  int filter_x_stride,
968                                  const int16_t *filter_y,
969                                  int filter_y_stride,
970                                  int w, int h) {
971   vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
972                                   filter_x_stride, filter_y, filter_y_stride,
973                                   w, h, 8);
974 }
975
976 void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
977                                      uint8_t *dst, ptrdiff_t dst_stride,
978                                      const int16_t *filter_x,
979                                      int filter_x_stride,
980                                      const int16_t *filter_y,
981                                      int filter_y_stride,
982                                      int w, int h) {
983   vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
984                                       filter_x, filter_x_stride,
985                                       filter_y, filter_y_stride, w, h, 8);
986 }
987
988 void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
989                                 uint8_t *dst, ptrdiff_t dst_stride,
990                                 const int16_t *filter_x,
991                                 int filter_x_stride,
992                                 const int16_t *filter_y,
993                                 int filter_y_stride,
994                                 int w, int h) {
995   vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
996                                  filter_x, filter_x_stride,
997                                  filter_y, filter_y_stride, w, h, 8);
998 }
999
1000 void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1001                                     uint8_t *dst, ptrdiff_t dst_stride,
1002                                     const int16_t *filter_x,
1003                                     int filter_x_stride,
1004                                     const int16_t *filter_y,
1005                                     int filter_y_stride,
1006                                     int w, int h) {
1007   vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
1008                                      filter_x, filter_x_stride,
1009                                      filter_y, filter_y_stride, w, h, 8);
1010 }
1011
1012 void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1013                            uint8_t *dst, ptrdiff_t dst_stride,
1014                            const int16_t *filter_x,
1015                            int filter_x_stride,
1016                            const int16_t *filter_y,
1017                            int filter_y_stride,
1018                            int w, int h) {
1019   vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
1020                             filter_x, filter_x_stride,
1021                             filter_y, filter_y_stride, w, h, 8);
1022 }
1023
1024 void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1025                                uint8_t *dst, ptrdiff_t dst_stride,
1026                                const int16_t *filter_x,
1027                                int filter_x_stride,
1028                                const int16_t *filter_y,
1029                                int filter_y_stride,
1030                                int w, int h) {
1031   vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
1032                                 filter_x, filter_x_stride,
1033                                 filter_y, filter_y_stride, w, h, 8);
1034 }
1035
1036 void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1037                                   uint8_t *dst, ptrdiff_t dst_stride,
1038                                   const int16_t *filter_x,
1039                                   int filter_x_stride,
1040                                   const int16_t *filter_y,
1041                                   int filter_y_stride,
1042                                   int w, int h) {
1043   vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
1044                                   filter_x, filter_x_stride,
1045                                   filter_y, filter_y_stride, w, h, 10);
1046 }
1047
1048 void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1049                                       uint8_t *dst, ptrdiff_t dst_stride,
1050                                       const int16_t *filter_x,
1051                                       int filter_x_stride,
1052                                       const int16_t *filter_y,
1053                                       int filter_y_stride,
1054                                       int w, int h) {
1055   vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
1056                                       filter_x, filter_x_stride,
1057                                       filter_y, filter_y_stride, w, h, 10);
1058 }
1059
1060 void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1061                                  uint8_t *dst, ptrdiff_t dst_stride,
1062                                  const int16_t *filter_x,
1063                                  int filter_x_stride,
1064                                  const int16_t *filter_y,
1065                                  int filter_y_stride,
1066                                  int w, int h) {
1067   vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
1068                                  filter_x, filter_x_stride,
1069                                  filter_y, filter_y_stride, w, h, 10);
1070 }
1071
1072 void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1073                                      uint8_t *dst, ptrdiff_t dst_stride,
1074                                      const int16_t *filter_x,
1075                                      int filter_x_stride,
1076                                      const int16_t *filter_y,
1077                                      int filter_y_stride,
1078                                      int w, int h) {
1079   vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
1080                                      filter_x, filter_x_stride,
1081                                      filter_y, filter_y_stride, w, h, 10);
1082 }
1083
1084 void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1085                             uint8_t *dst, ptrdiff_t dst_stride,
1086                             const int16_t *filter_x,
1087                             int filter_x_stride,
1088                             const int16_t *filter_y,
1089                             int filter_y_stride,
1090                             int w, int h) {
1091   vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
1092                             filter_x, filter_x_stride,
1093                             filter_y, filter_y_stride, w, h, 10);
1094 }
1095
1096 void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1097                                 uint8_t *dst, ptrdiff_t dst_stride,
1098                                 const int16_t *filter_x,
1099                                 int filter_x_stride,
1100                                 const int16_t *filter_y,
1101                                 int filter_y_stride,
1102                                 int w, int h) {
1103   vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
1104                                 filter_x, filter_x_stride,
1105                                 filter_y, filter_y_stride, w, h, 10);
1106 }
1107
1108 void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1109                                   uint8_t *dst, ptrdiff_t dst_stride,
1110                                   const int16_t *filter_x,
1111                                   int filter_x_stride,
1112                                   const int16_t *filter_y,
1113                                   int filter_y_stride,
1114                                   int w, int h) {
1115   vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
1116                                   filter_x, filter_x_stride,
1117                                   filter_y, filter_y_stride, w, h, 12);
1118 }
1119
1120 void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1121                                       uint8_t *dst, ptrdiff_t dst_stride,
1122                                       const int16_t *filter_x,
1123                                       int filter_x_stride,
1124                                       const int16_t *filter_y,
1125                                       int filter_y_stride,
1126                                       int w, int h) {
1127   vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
1128                                       filter_x, filter_x_stride,
1129                                       filter_y, filter_y_stride, w, h, 12);
1130 }
1131
1132 void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1133                                  uint8_t *dst, ptrdiff_t dst_stride,
1134                                  const int16_t *filter_x,
1135                                  int filter_x_stride,
1136                                  const int16_t *filter_y,
1137                                  int filter_y_stride,
1138                                  int w, int h) {
1139   vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
1140                                  filter_x, filter_x_stride,
1141                                  filter_y, filter_y_stride, w, h, 12);
1142 }
1143
1144 void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1145                                      uint8_t *dst, ptrdiff_t dst_stride,
1146                                      const int16_t *filter_x,
1147                                      int filter_x_stride,
1148                                      const int16_t *filter_y,
1149                                      int filter_y_stride,
1150                                      int w, int h) {
1151   vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
1152                                      filter_x, filter_x_stride,
1153                                      filter_y, filter_y_stride, w, h, 12);
1154 }
1155
1156 void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1157                             uint8_t *dst, ptrdiff_t dst_stride,
1158                             const int16_t *filter_x,
1159                             int filter_x_stride,
1160                             const int16_t *filter_y,
1161                             int filter_y_stride,
1162                             int w, int h) {
1163   vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
1164                             filter_x, filter_x_stride,
1165                             filter_y, filter_y_stride, w, h, 12);
1166 }
1167
1168 void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1169                                 uint8_t *dst, ptrdiff_t dst_stride,
1170                                 const int16_t *filter_x,
1171                                 int filter_x_stride,
1172                                 const int16_t *filter_y,
1173                                 int filter_y_stride,
1174                                 int w, int h) {
1175   vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
1176                                 filter_x, filter_x_stride,
1177                                 filter_y, filter_y_stride, w, h, 12);
1178 }
1179 #endif  // HAVE_SSE2 && ARCH_X86_64
1180
1181 void wrap_convolve_copy_c_8(const uint8_t *src, ptrdiff_t src_stride,
1182                             uint8_t *dst, ptrdiff_t dst_stride,
1183                             const int16_t *filter_x,
1184                             int filter_x_stride,
1185                             const int16_t *filter_y,
1186                             int filter_y_stride,
1187                             int w, int h) {
1188   vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
1189                              filter_x, filter_x_stride,
1190                              filter_y, filter_y_stride, w, h, 8);
1191 }
1192
1193 void wrap_convolve_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
1194                            uint8_t *dst, ptrdiff_t dst_stride,
1195                            const int16_t *filter_x,
1196                            int filter_x_stride,
1197                            const int16_t *filter_y,
1198                            int filter_y_stride,
1199                            int w, int h) {
1200   vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
1201                             filter_x, filter_x_stride,
1202                             filter_y, filter_y_stride, w, h, 8);
1203 }
1204
1205 void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1206                               uint8_t *dst, ptrdiff_t dst_stride,
1207                               const int16_t *filter_x,
1208                               int filter_x_stride,
1209                               const int16_t *filter_y,
1210                               int filter_y_stride,
1211                               int w, int h) {
1212   vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
1213                                filter_x, filter_x_stride,
1214                                filter_y, filter_y_stride, w, h, 8);
1215 }
1216
1217 void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1218                                   uint8_t *dst, ptrdiff_t dst_stride,
1219                                   const int16_t *filter_x,
1220                                   int filter_x_stride,
1221                                   const int16_t *filter_y,
1222                                   int filter_y_stride,
1223                                   int w, int h) {
1224   vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
1225                                    filter_x, filter_x_stride,
1226                                    filter_y, filter_y_stride, w, h, 8);
1227 }
1228
1229 void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1230                              uint8_t *dst, ptrdiff_t dst_stride,
1231                              const int16_t *filter_x,
1232                              int filter_x_stride,
1233                              const int16_t *filter_y,
1234                              int filter_y_stride,
1235                              int w, int h) {
1236   vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
1237                               filter_x, filter_x_stride,
1238                               filter_y, filter_y_stride, w, h, 8);
1239 }
1240
1241 void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1242                                  uint8_t *dst, ptrdiff_t dst_stride,
1243                                  const int16_t *filter_x,
1244                                  int filter_x_stride,
1245                                  const int16_t *filter_y,
1246                                  int filter_y_stride,
1247                                  int w, int h) {
1248   vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
1249                                   filter_x, filter_x_stride,
1250                                   filter_y, filter_y_stride, w, h, 8);
1251 }
1252
1253 void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride,
1254                         uint8_t *dst, ptrdiff_t dst_stride,
1255                         const int16_t *filter_x,
1256                         int filter_x_stride,
1257                         const int16_t *filter_y,
1258                         int filter_y_stride,
1259                         int w, int h) {
1260   vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
1261                          filter_x, filter_x_stride,
1262                          filter_y, filter_y_stride, w, h, 8);
1263 }
1264
1265 void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
1266                             uint8_t *dst, ptrdiff_t dst_stride,
1267                             const int16_t *filter_x,
1268                             int filter_x_stride,
1269                             const int16_t *filter_y,
1270                             int filter_y_stride,
1271                             int w, int h) {
1272   vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
1273                              filter_x, filter_x_stride,
1274                              filter_y, filter_y_stride, w, h, 8);
1275 }
1276
1277 void wrap_convolve_copy_c_10(const uint8_t *src, ptrdiff_t src_stride,
1278                              uint8_t *dst, ptrdiff_t dst_stride,
1279                              const int16_t *filter_x,
1280                              int filter_x_stride,
1281                              const int16_t *filter_y,
1282                              int filter_y_stride,
1283                              int w, int h) {
1284   vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
1285                              filter_x, filter_x_stride,
1286                              filter_y, filter_y_stride, w, h, 10);
1287 }
1288
1289 void wrap_convolve_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
1290                             uint8_t *dst, ptrdiff_t dst_stride,
1291                             const int16_t *filter_x,
1292                             int filter_x_stride,
1293                             const int16_t *filter_y,
1294                             int filter_y_stride,
1295                             int w, int h) {
1296   vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
1297                             filter_x, filter_x_stride,
1298                             filter_y, filter_y_stride, w, h, 10);
1299 }
1300
1301 void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1302                                uint8_t *dst, ptrdiff_t dst_stride,
1303                                const int16_t *filter_x,
1304                                int filter_x_stride,
1305                                const int16_t *filter_y,
1306                                int filter_y_stride,
1307                                int w, int h) {
1308   vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
1309                                filter_x, filter_x_stride,
1310                                filter_y, filter_y_stride, w, h, 10);
1311 }
1312
1313 void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1314                                    uint8_t *dst, ptrdiff_t dst_stride,
1315                                    const int16_t *filter_x,
1316                                    int filter_x_stride,
1317                                    const int16_t *filter_y,
1318                                    int filter_y_stride,
1319                                    int w, int h) {
1320   vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
1321                                    filter_x, filter_x_stride,
1322                                    filter_y, filter_y_stride, w, h, 10);
1323 }
1324
1325 void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1326                               uint8_t *dst, ptrdiff_t dst_stride,
1327                               const int16_t *filter_x,
1328                               int filter_x_stride,
1329                               const int16_t *filter_y,
1330                               int filter_y_stride,
1331                               int w, int h) {
1332   vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
1333                               filter_x, filter_x_stride,
1334                               filter_y, filter_y_stride, w, h, 10);
1335 }
1336
1337 void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1338                                   uint8_t *dst, ptrdiff_t dst_stride,
1339                                   const int16_t *filter_x,
1340                                   int filter_x_stride,
1341                                   const int16_t *filter_y,
1342                                   int filter_y_stride,
1343                                   int w, int h) {
1344   vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
1345                                   filter_x, filter_x_stride,
1346                                   filter_y, filter_y_stride, w, h, 10);
1347 }
1348
1349 void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride,
1350                          uint8_t *dst, ptrdiff_t dst_stride,
1351                          const int16_t *filter_x,
1352                          int filter_x_stride,
1353                          const int16_t *filter_y,
1354                          int filter_y_stride,
1355                          int w, int h) {
1356   vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
1357                          filter_x, filter_x_stride,
1358                          filter_y, filter_y_stride, w, h, 10);
1359 }
1360
1361 void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
1362                              uint8_t *dst, ptrdiff_t dst_stride,
1363                              const int16_t *filter_x,
1364                              int filter_x_stride,
1365                              const int16_t *filter_y,
1366                              int filter_y_stride,
1367                              int w, int h) {
1368   vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
1369                              filter_x, filter_x_stride,
1370                              filter_y, filter_y_stride, w, h, 10);
1371 }
1372
1373 void wrap_convolve_copy_c_12(const uint8_t *src, ptrdiff_t src_stride,
1374                              uint8_t *dst, ptrdiff_t dst_stride,
1375                              const int16_t *filter_x,
1376                              int filter_x_stride,
1377                              const int16_t *filter_y,
1378                              int filter_y_stride,
1379                              int w, int h) {
1380   vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
1381                              filter_x, filter_x_stride,
1382                              filter_y, filter_y_stride, w, h, 12);
1383 }
1384
1385 void wrap_convolve_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
1386                             uint8_t *dst, ptrdiff_t dst_stride,
1387                             const int16_t *filter_x,
1388                             int filter_x_stride,
1389                             const int16_t *filter_y,
1390                             int filter_y_stride,
1391                             int w, int h) {
1392   vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
1393                             filter_x, filter_x_stride,
1394                             filter_y, filter_y_stride, w, h, 12);
1395 }
1396
1397 void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1398                                uint8_t *dst, ptrdiff_t dst_stride,
1399                                const int16_t *filter_x,
1400                                int filter_x_stride,
1401                                const int16_t *filter_y,
1402                                int filter_y_stride,
1403                                int w, int h) {
1404   vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
1405                                filter_x, filter_x_stride,
1406                                filter_y, filter_y_stride, w, h, 12);
1407 }
1408
1409 void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1410                                    uint8_t *dst, ptrdiff_t dst_stride,
1411                                    const int16_t *filter_x,
1412                                    int filter_x_stride,
1413                                    const int16_t *filter_y,
1414                                    int filter_y_stride,
1415                                    int w, int h) {
1416   vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
1417                                    filter_x, filter_x_stride,
1418                                    filter_y, filter_y_stride, w, h, 12);
1419 }
1420
1421 void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1422                               uint8_t *dst, ptrdiff_t dst_stride,
1423                               const int16_t *filter_x,
1424                               int filter_x_stride,
1425                               const int16_t *filter_y,
1426                               int filter_y_stride,
1427                               int w, int h) {
1428   vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
1429                               filter_x, filter_x_stride,
1430                               filter_y, filter_y_stride, w, h, 12);
1431 }
1432
1433 void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1434                                   uint8_t *dst, ptrdiff_t dst_stride,
1435                                   const int16_t *filter_x,
1436                                   int filter_x_stride,
1437                                   const int16_t *filter_y,
1438                                   int filter_y_stride,
1439                                   int w, int h) {
1440   vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
1441                                   filter_x, filter_x_stride,
1442                                   filter_y, filter_y_stride, w, h, 12);
1443 }
1444
1445 void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride,
1446                          uint8_t *dst, ptrdiff_t dst_stride,
1447                          const int16_t *filter_x,
1448                          int filter_x_stride,
1449                          const int16_t *filter_y,
1450                          int filter_y_stride,
1451                          int w, int h) {
1452   vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
1453                          filter_x, filter_x_stride,
1454                          filter_y, filter_y_stride, w, h, 12);
1455 }
1456
1457 void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
1458                              uint8_t *dst, ptrdiff_t dst_stride,
1459                              const int16_t *filter_x,
1460                              int filter_x_stride,
1461                              const int16_t *filter_y,
1462                              int filter_y_stride,
1463                              int w, int h) {
1464   vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
1465                              filter_x, filter_x_stride,
1466                              filter_y, filter_y_stride, w, h, 12);
1467 }
1468
1469 const ConvolveFunctions convolve8_c(
1470     wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
1471     wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1472     wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
1473     wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
1474     wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1475     wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
1476     wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
1477 INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
1478     make_tuple(4, 4, &convolve8_c),
1479     make_tuple(8, 4, &convolve8_c),
1480     make_tuple(4, 8, &convolve8_c),
1481     make_tuple(8, 8, &convolve8_c),
1482     make_tuple(16, 8, &convolve8_c),
1483     make_tuple(8, 16, &convolve8_c),
1484     make_tuple(16, 16, &convolve8_c),
1485     make_tuple(32, 16, &convolve8_c),
1486     make_tuple(16, 32, &convolve8_c),
1487     make_tuple(32, 32, &convolve8_c),
1488     make_tuple(64, 32, &convolve8_c),
1489     make_tuple(32, 64, &convolve8_c),
1490     make_tuple(64, 64, &convolve8_c)));
1491 const ConvolveFunctions convolve10_c(
1492     wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
1493     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1494     wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
1495     wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
1496     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1497     wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
1498     wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
1499 INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
1500     make_tuple(4, 4, &convolve10_c),
1501     make_tuple(8, 4, &convolve10_c),
1502     make_tuple(4, 8, &convolve10_c),
1503     make_tuple(8, 8, &convolve10_c),
1504     make_tuple(16, 8, &convolve10_c),
1505     make_tuple(8, 16, &convolve10_c),
1506     make_tuple(16, 16, &convolve10_c),
1507     make_tuple(32, 16, &convolve10_c),
1508     make_tuple(16, 32, &convolve10_c),
1509     make_tuple(32, 32, &convolve10_c),
1510     make_tuple(64, 32, &convolve10_c),
1511     make_tuple(32, 64, &convolve10_c),
1512     make_tuple(64, 64, &convolve10_c)));
1513 const ConvolveFunctions convolve12_c(
1514     wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
1515     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1516     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
1517     wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
1518     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1519     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
1520     wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
1521 INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
1522     make_tuple(4, 4, &convolve12_c),
1523     make_tuple(8, 4, &convolve12_c),
1524     make_tuple(4, 8, &convolve12_c),
1525     make_tuple(8, 8, &convolve12_c),
1526     make_tuple(16, 8, &convolve12_c),
1527     make_tuple(8, 16, &convolve12_c),
1528     make_tuple(16, 16, &convolve12_c),
1529     make_tuple(32, 16, &convolve12_c),
1530     make_tuple(16, 32, &convolve12_c),
1531     make_tuple(32, 32, &convolve12_c),
1532     make_tuple(64, 32, &convolve12_c),
1533     make_tuple(32, 64, &convolve12_c),
1534     make_tuple(64, 64, &convolve12_c)));
1535
1536 #else
1537
1538 const ConvolveFunctions convolve8_c(
1539     vpx_convolve_copy_c, vpx_convolve_avg_c,
1540     vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c,
1541     vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c,
1542     vpx_convolve8_c, vpx_convolve8_avg_c,
1543     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1544     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1545     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1546
1547 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
1548     make_tuple(4, 4, &convolve8_c),
1549     make_tuple(8, 4, &convolve8_c),
1550     make_tuple(4, 8, &convolve8_c),
1551     make_tuple(8, 8, &convolve8_c),
1552     make_tuple(16, 8, &convolve8_c),
1553     make_tuple(8, 16, &convolve8_c),
1554     make_tuple(16, 16, &convolve8_c),
1555     make_tuple(32, 16, &convolve8_c),
1556     make_tuple(16, 32, &convolve8_c),
1557     make_tuple(32, 32, &convolve8_c),
1558     make_tuple(64, 32, &convolve8_c),
1559     make_tuple(32, 64, &convolve8_c),
1560     make_tuple(64, 64, &convolve8_c)));
1561 #endif
1562
1563 #if HAVE_SSE2 && ARCH_X86_64
1564 #if CONFIG_VP9_HIGHBITDEPTH
1565 const ConvolveFunctions convolve8_sse2(
1566     wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
1567     wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1568     wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1569     wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
1570     wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1571     wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1572     wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1573 const ConvolveFunctions convolve10_sse2(
1574     wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
1575     wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1576     wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1577     wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
1578     wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1579     wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1580     wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
1581 const ConvolveFunctions convolve12_sse2(
1582     wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
1583     wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1584     wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1585     wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
1586     wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1587     wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1588     wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
1589 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
1590     make_tuple(4, 4, &convolve8_sse2),
1591     make_tuple(8, 4, &convolve8_sse2),
1592     make_tuple(4, 8, &convolve8_sse2),
1593     make_tuple(8, 8, &convolve8_sse2),
1594     make_tuple(16, 8, &convolve8_sse2),
1595     make_tuple(8, 16, &convolve8_sse2),
1596     make_tuple(16, 16, &convolve8_sse2),
1597     make_tuple(32, 16, &convolve8_sse2),
1598     make_tuple(16, 32, &convolve8_sse2),
1599     make_tuple(32, 32, &convolve8_sse2),
1600     make_tuple(64, 32, &convolve8_sse2),
1601     make_tuple(32, 64, &convolve8_sse2),
1602     make_tuple(64, 64, &convolve8_sse2),
1603     make_tuple(4, 4, &convolve10_sse2),
1604     make_tuple(8, 4, &convolve10_sse2),
1605     make_tuple(4, 8, &convolve10_sse2),
1606     make_tuple(8, 8, &convolve10_sse2),
1607     make_tuple(16, 8, &convolve10_sse2),
1608     make_tuple(8, 16, &convolve10_sse2),
1609     make_tuple(16, 16, &convolve10_sse2),
1610     make_tuple(32, 16, &convolve10_sse2),
1611     make_tuple(16, 32, &convolve10_sse2),
1612     make_tuple(32, 32, &convolve10_sse2),
1613     make_tuple(64, 32, &convolve10_sse2),
1614     make_tuple(32, 64, &convolve10_sse2),
1615     make_tuple(64, 64, &convolve10_sse2),
1616     make_tuple(4, 4, &convolve12_sse2),
1617     make_tuple(8, 4, &convolve12_sse2),
1618     make_tuple(4, 8, &convolve12_sse2),
1619     make_tuple(8, 8, &convolve12_sse2),
1620     make_tuple(16, 8, &convolve12_sse2),
1621     make_tuple(8, 16, &convolve12_sse2),
1622     make_tuple(16, 16, &convolve12_sse2),
1623     make_tuple(32, 16, &convolve12_sse2),
1624     make_tuple(16, 32, &convolve12_sse2),
1625     make_tuple(32, 32, &convolve12_sse2),
1626     make_tuple(64, 32, &convolve12_sse2),
1627     make_tuple(32, 64, &convolve12_sse2),
1628     make_tuple(64, 64, &convolve12_sse2)));
1629 #else
1630 const ConvolveFunctions convolve8_sse2(
1631 #if CONFIG_USE_X86INC
1632     vpx_convolve_copy_sse2, vpx_convolve_avg_sse2,
1633 #else
1634     vpx_convolve_copy_c, vpx_convolve_avg_c,
1635 #endif  // CONFIG_USE_X86INC
1636     vpx_convolve8_horiz_sse2, vpx_convolve8_avg_horiz_sse2,
1637     vpx_convolve8_vert_sse2, vpx_convolve8_avg_vert_sse2,
1638     vpx_convolve8_sse2, vpx_convolve8_avg_sse2,
1639     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1640     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1641     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1642
1643 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
1644     make_tuple(4, 4, &convolve8_sse2),
1645     make_tuple(8, 4, &convolve8_sse2),
1646     make_tuple(4, 8, &convolve8_sse2),
1647     make_tuple(8, 8, &convolve8_sse2),
1648     make_tuple(16, 8, &convolve8_sse2),
1649     make_tuple(8, 16, &convolve8_sse2),
1650     make_tuple(16, 16, &convolve8_sse2),
1651     make_tuple(32, 16, &convolve8_sse2),
1652     make_tuple(16, 32, &convolve8_sse2),
1653     make_tuple(32, 32, &convolve8_sse2),
1654     make_tuple(64, 32, &convolve8_sse2),
1655     make_tuple(32, 64, &convolve8_sse2),
1656     make_tuple(64, 64, &convolve8_sse2)));
1657 #endif  // CONFIG_VP9_HIGHBITDEPTH
1658 #endif
1659
1660 #if HAVE_SSSE3
1661 const ConvolveFunctions convolve8_ssse3(
1662     vpx_convolve_copy_c, vpx_convolve_avg_c,
1663     vpx_convolve8_horiz_ssse3, vpx_convolve8_avg_horiz_ssse3,
1664     vpx_convolve8_vert_ssse3, vpx_convolve8_avg_vert_ssse3,
1665     vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
1666     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1667     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1668     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1669
1670 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
1671     make_tuple(4, 4, &convolve8_ssse3),
1672     make_tuple(8, 4, &convolve8_ssse3),
1673     make_tuple(4, 8, &convolve8_ssse3),
1674     make_tuple(8, 8, &convolve8_ssse3),
1675     make_tuple(16, 8, &convolve8_ssse3),
1676     make_tuple(8, 16, &convolve8_ssse3),
1677     make_tuple(16, 16, &convolve8_ssse3),
1678     make_tuple(32, 16, &convolve8_ssse3),
1679     make_tuple(16, 32, &convolve8_ssse3),
1680     make_tuple(32, 32, &convolve8_ssse3),
1681     make_tuple(64, 32, &convolve8_ssse3),
1682     make_tuple(32, 64, &convolve8_ssse3),
1683     make_tuple(64, 64, &convolve8_ssse3)));
1684 #endif
1685
1686 #if HAVE_AVX2 && HAVE_SSSE3
1687 const ConvolveFunctions convolve8_avx2(
1688     vpx_convolve_copy_c, vpx_convolve_avg_c,
1689     vpx_convolve8_horiz_avx2, vpx_convolve8_avg_horiz_ssse3,
1690     vpx_convolve8_vert_avx2, vpx_convolve8_avg_vert_ssse3,
1691     vpx_convolve8_avx2, vpx_convolve8_avg_ssse3,
1692     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1693     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1694     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1695
1696 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
1697     make_tuple(4, 4, &convolve8_avx2),
1698     make_tuple(8, 4, &convolve8_avx2),
1699     make_tuple(4, 8, &convolve8_avx2),
1700     make_tuple(8, 8, &convolve8_avx2),
1701     make_tuple(8, 16, &convolve8_avx2),
1702     make_tuple(16, 8, &convolve8_avx2),
1703     make_tuple(16, 16, &convolve8_avx2),
1704     make_tuple(32, 16, &convolve8_avx2),
1705     make_tuple(16, 32, &convolve8_avx2),
1706     make_tuple(32, 32, &convolve8_avx2),
1707     make_tuple(64, 32, &convolve8_avx2),
1708     make_tuple(32, 64, &convolve8_avx2),
1709     make_tuple(64, 64, &convolve8_avx2)));
1710 #endif  // HAVE_AVX2 && HAVE_SSSE3
1711
1712 #if HAVE_NEON
1713 #if HAVE_NEON_ASM
1714 const ConvolveFunctions convolve8_neon(
1715     vpx_convolve_copy_neon, vpx_convolve_avg_neon,
1716     vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
1717     vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
1718     vpx_convolve8_neon, vpx_convolve8_avg_neon,
1719     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1720     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1721     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1722 #else  // HAVE_NEON
1723 const ConvolveFunctions convolve8_neon(
1724     vpx_convolve_copy_neon, vpx_convolve_avg_neon,
1725     vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
1726     vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
1727     vpx_convolve8_neon, vpx_convolve8_avg_neon,
1728     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1729     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1730     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1731 #endif  // HAVE_NEON_ASM
1732
1733 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
1734     make_tuple(4, 4, &convolve8_neon),
1735     make_tuple(8, 4, &convolve8_neon),
1736     make_tuple(4, 8, &convolve8_neon),
1737     make_tuple(8, 8, &convolve8_neon),
1738     make_tuple(16, 8, &convolve8_neon),
1739     make_tuple(8, 16, &convolve8_neon),
1740     make_tuple(16, 16, &convolve8_neon),
1741     make_tuple(32, 16, &convolve8_neon),
1742     make_tuple(16, 32, &convolve8_neon),
1743     make_tuple(32, 32, &convolve8_neon),
1744     make_tuple(64, 32, &convolve8_neon),
1745     make_tuple(32, 64, &convolve8_neon),
1746     make_tuple(64, 64, &convolve8_neon)));
1747 #endif  // HAVE_NEON
1748
1749 #if HAVE_DSPR2
1750 const ConvolveFunctions convolve8_dspr2(
1751     vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2,
1752     vpx_convolve8_horiz_dspr2, vpx_convolve8_avg_horiz_dspr2,
1753     vpx_convolve8_vert_dspr2, vpx_convolve8_avg_vert_dspr2,
1754     vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2,
1755     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1756     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1757     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1758
1759 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
1760     make_tuple(4, 4, &convolve8_dspr2),
1761     make_tuple(8, 4, &convolve8_dspr2),
1762     make_tuple(4, 8, &convolve8_dspr2),
1763     make_tuple(8, 8, &convolve8_dspr2),
1764     make_tuple(16, 8, &convolve8_dspr2),
1765     make_tuple(8, 16, &convolve8_dspr2),
1766     make_tuple(16, 16, &convolve8_dspr2),
1767     make_tuple(32, 16, &convolve8_dspr2),
1768     make_tuple(16, 32, &convolve8_dspr2),
1769     make_tuple(32, 32, &convolve8_dspr2),
1770     make_tuple(64, 32, &convolve8_dspr2),
1771     make_tuple(32, 64, &convolve8_dspr2),
1772     make_tuple(64, 64, &convolve8_dspr2)));
1773 #endif
1774
1775 #if HAVE_MSA
1776 const ConvolveFunctions convolve8_msa(
1777     vpx_convolve_copy_msa, vpx_convolve_avg_msa,
1778     vpx_convolve8_horiz_msa, vpx_convolve8_avg_horiz_msa,
1779     vpx_convolve8_vert_msa, vpx_convolve8_avg_vert_msa,
1780     vpx_convolve8_msa, vpx_convolve8_avg_msa,
1781     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1782     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1783     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1784
1785 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
1786     make_tuple(4, 4, &convolve8_msa),
1787     make_tuple(8, 4, &convolve8_msa),
1788     make_tuple(4, 8, &convolve8_msa),
1789     make_tuple(8, 8, &convolve8_msa),
1790     make_tuple(16, 8, &convolve8_msa),
1791     make_tuple(8, 16, &convolve8_msa),
1792     make_tuple(16, 16, &convolve8_msa),
1793     make_tuple(32, 16, &convolve8_msa),
1794     make_tuple(16, 32, &convolve8_msa),
1795     make_tuple(32, 32, &convolve8_msa),
1796     make_tuple(64, 32, &convolve8_msa),
1797     make_tuple(32, 64, &convolve8_msa),
1798     make_tuple(64, 64, &convolve8_msa)));
1799 #endif  // HAVE_MSA
1800 }  // namespace