granicus.if.org Git - libvpx/blob - test/convolve_test.cc

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <string.h>
  12
  13 #include "third_party/googletest/src/include/gtest/gtest.h"
  14
  15 #include "./vpx_config.h"
  16 #include "./vp9_rtcd.h"
  17 #include "./vpx_dsp_rtcd.h"
  18 #include "test/acm_random.h"
  19 #include "test/clear_system_state.h"
  20 #include "test/register_state_check.h"
  21 #include "test/util.h"
  22 #include "vp9/common/vp9_common.h"
  23 #include "vp9/common/vp9_filter.h"
  24 #include "vpx_dsp/vpx_dsp_common.h"
  25 #include "vpx_dsp/vpx_filter.h"
  26 #include "vpx_mem/vpx_mem.h"
  27 #include "vpx_ports/mem.h"
  28
  29 namespace {
  30
  31 static const unsigned int kMaxDimension = 64;
  32
  33 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
  34                              uint8_t *dst, ptrdiff_t dst_stride,
  35                              const int16_t *filter_x, int filter_x_stride,
  36                              const int16_t *filter_y, int filter_y_stride,
  37                              int w, int h);
  38
  39 struct ConvolveFunctions {
  40   ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg,
  41                     ConvolveFunc h8, ConvolveFunc h8_avg,
  42                     ConvolveFunc v8, ConvolveFunc v8_avg,
  43                     ConvolveFunc hv8, ConvolveFunc hv8_avg,
  44                     ConvolveFunc sh8, ConvolveFunc sh8_avg,
  45                     ConvolveFunc sv8, ConvolveFunc sv8_avg,
  46                     ConvolveFunc shv8, ConvolveFunc shv8_avg,
  47                     int bd)
  48       : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
  49         v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
  50         sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
  51         use_highbd_(bd) {}
  52
  53   ConvolveFunc copy_;
  54   ConvolveFunc avg_;
  55   ConvolveFunc h8_;
  56   ConvolveFunc v8_;
  57   ConvolveFunc hv8_;
  58   ConvolveFunc h8_avg_;
  59   ConvolveFunc v8_avg_;
  60   ConvolveFunc hv8_avg_;
  61   ConvolveFunc sh8_;        // scaled horiz
  62   ConvolveFunc sv8_;        // scaled vert
  63   ConvolveFunc shv8_;       // scaled horiz/vert
  64   ConvolveFunc sh8_avg_;    // scaled avg horiz
  65   ConvolveFunc sv8_avg_;    // scaled avg vert
  66   ConvolveFunc shv8_avg_;   // scaled avg horiz/vert
  67   int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
  68 };
  69
  70 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
  71
  72 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
  73 #define VP9_FILTER_WEIGHT 128
  74 #define VP9_FILTER_SHIFT 7
  75 uint8_t clip_pixel(int x) {
  76   return x < 0 ? 0 :
  77          x > 255 ? 255 :
  78          x;
  79 }
  80
  81 void filter_block2d_8_c(const uint8_t *src_ptr,
  82                         const unsigned int src_stride,
  83                         const int16_t *HFilter,
  84                         const int16_t *VFilter,
  85                         uint8_t *dst_ptr,
  86                         unsigned int dst_stride,
  87                         unsigned int output_width,
  88                         unsigned int output_height) {
  89   // Between passes, we use an intermediate buffer whose height is extended to
  90   // have enough horizontally filtered values as input for the vertical pass.
  91   // This buffer is allocated to be big enough for the largest block type we
  92   // support.
  93   const int kInterp_Extend = 4;
  94   const unsigned int intermediate_height =
  95       (kInterp_Extend - 1) + output_height + kInterp_Extend;
  96   unsigned int i, j;
  97
  98   // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
  99   // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
 100   //                                 + kInterp_Extend
 101   //                               = 3 + 16 + 4
 102   //                               = 23
 103   // and filter_max_width          = 16
 104   //
 105   uint8_t intermediate_buffer[71 * kMaxDimension];
 106   const int intermediate_next_stride = 1 - intermediate_height * output_width;
 107
 108   // Horizontal pass (src -> transposed intermediate).
 109   uint8_t *output_ptr = intermediate_buffer;
 110   const int src_next_row_stride = src_stride - output_width;
 111   src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
 112   for (i = 0; i < intermediate_height; ++i) {
 113     for (j = 0; j < output_width; ++j) {
 114       // Apply filter...
 115       const int temp = (src_ptr[0] * HFilter[0]) +
 116           (src_ptr[1] * HFilter[1]) +
 117           (src_ptr[2] * HFilter[2]) +
 118           (src_ptr[3] * HFilter[3]) +
 119           (src_ptr[4] * HFilter[4]) +
 120           (src_ptr[5] * HFilter[5]) +
 121           (src_ptr[6] * HFilter[6]) +
 122           (src_ptr[7] * HFilter[7]) +
 123           (VP9_FILTER_WEIGHT >> 1);  // Rounding
 124
 125       // Normalize back to 0-255...
 126       *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
 127       ++src_ptr;
 128       output_ptr += intermediate_height;
 129     }
 130     src_ptr += src_next_row_stride;
 131     output_ptr += intermediate_next_stride;
 132   }
 133
 134   // Vertical pass (transposed intermediate -> dst).
 135   src_ptr = intermediate_buffer;
 136   const int dst_next_row_stride = dst_stride - output_width;
 137   for (i = 0; i < output_height; ++i) {
 138     for (j = 0; j < output_width; ++j) {
 139       // Apply filter...
 140       const int temp = (src_ptr[0] * VFilter[0]) +
 141           (src_ptr[1] * VFilter[1]) +
 142           (src_ptr[2] * VFilter[2]) +
 143           (src_ptr[3] * VFilter[3]) +
 144           (src_ptr[4] * VFilter[4]) +
 145           (src_ptr[5] * VFilter[5]) +
 146           (src_ptr[6] * VFilter[6]) +
 147           (src_ptr[7] * VFilter[7]) +
 148           (VP9_FILTER_WEIGHT >> 1);  // Rounding
 149
 150       // Normalize back to 0-255...
 151       *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
 152       src_ptr += intermediate_height;
 153     }
 154     src_ptr += intermediate_next_stride;
 155     dst_ptr += dst_next_row_stride;
 156   }
 157 }
 158
 159 void block2d_average_c(uint8_t *src,
 160                        unsigned int src_stride,
 161                        uint8_t *output_ptr,
 162                        unsigned int output_stride,
 163                        unsigned int output_width,
 164                        unsigned int output_height) {
 165   unsigned int i, j;
 166   for (i = 0; i < output_height; ++i) {
 167     for (j = 0; j < output_width; ++j) {
 168       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
 169     }
 170     output_ptr += output_stride;
 171   }
 172 }
 173
 174 void filter_average_block2d_8_c(const uint8_t *src_ptr,
 175                                 const unsigned int src_stride,
 176                                 const int16_t *HFilter,
 177                                 const int16_t *VFilter,
 178                                 uint8_t *dst_ptr,
 179                                 unsigned int dst_stride,
 180                                 unsigned int output_width,
 181                                 unsigned int output_height) {
 182   uint8_t tmp[kMaxDimension * kMaxDimension];
 183
 184   assert(output_width <= kMaxDimension);
 185   assert(output_height <= kMaxDimension);
 186   filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
 187                      output_width, output_height);
 188   block2d_average_c(tmp, 64, dst_ptr, dst_stride,
 189                     output_width, output_height);
 190 }
 191
 192 #if CONFIG_VP9_HIGHBITDEPTH
 193 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
 194                                const unsigned int src_stride,
 195                                const int16_t *HFilter,
 196                                const int16_t *VFilter,
 197                                uint16_t *dst_ptr,
 198                                unsigned int dst_stride,
 199                                unsigned int output_width,
 200                                unsigned int output_height,
 201                                int bd) {
 202   // Between passes, we use an intermediate buffer whose height is extended to
 203   // have enough horizontally filtered values as input for the vertical pass.
 204   // This buffer is allocated to be big enough for the largest block type we
 205   // support.
 206   const int kInterp_Extend = 4;
 207   const unsigned int intermediate_height =
 208       (kInterp_Extend - 1) + output_height + kInterp_Extend;
 209
 210   /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
 211    * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
 212    *                                 + kInterp_Extend
 213    *                               = 3 + 16 + 4
 214    *                               = 23
 215    * and filter_max_width = 16
 216    */
 217   uint16_t intermediate_buffer[71 * kMaxDimension];
 218   const int intermediate_next_stride = 1 - intermediate_height * output_width;
 219
 220   // Horizontal pass (src -> transposed intermediate).
 221   {
 222     uint16_t *output_ptr = intermediate_buffer;
 223     const int src_next_row_stride = src_stride - output_width;
 224     unsigned int i, j;
 225     src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
 226     for (i = 0; i < intermediate_height; ++i) {
 227       for (j = 0; j < output_width; ++j) {
 228         // Apply filter...
 229         const int temp = (src_ptr[0] * HFilter[0]) +
 230                          (src_ptr[1] * HFilter[1]) +
 231                          (src_ptr[2] * HFilter[2]) +
 232                          (src_ptr[3] * HFilter[3]) +
 233                          (src_ptr[4] * HFilter[4]) +
 234                          (src_ptr[5] * HFilter[5]) +
 235                          (src_ptr[6] * HFilter[6]) +
 236                          (src_ptr[7] * HFilter[7]) +
 237                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
 238
 239         // Normalize back to 0-255...
 240         *output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
 241         ++src_ptr;
 242         output_ptr += intermediate_height;
 243       }
 244       src_ptr += src_next_row_stride;
 245       output_ptr += intermediate_next_stride;
 246     }
 247   }
 248
 249   // Vertical pass (transposed intermediate -> dst).
 250   {
 251     uint16_t *src_ptr = intermediate_buffer;
 252     const int dst_next_row_stride = dst_stride - output_width;
 253     unsigned int i, j;
 254     for (i = 0; i < output_height; ++i) {
 255       for (j = 0; j < output_width; ++j) {
 256         // Apply filter...
 257         const int temp = (src_ptr[0] * VFilter[0]) +
 258                          (src_ptr[1] * VFilter[1]) +
 259                          (src_ptr[2] * VFilter[2]) +
 260                          (src_ptr[3] * VFilter[3]) +
 261                          (src_ptr[4] * VFilter[4]) +
 262                          (src_ptr[5] * VFilter[5]) +
 263                          (src_ptr[6] * VFilter[6]) +
 264                          (src_ptr[7] * VFilter[7]) +
 265                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
 266
 267         // Normalize back to 0-255...
 268         *dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
 269         src_ptr += intermediate_height;
 270       }
 271       src_ptr += intermediate_next_stride;
 272       dst_ptr += dst_next_row_stride;
 273     }
 274   }
 275 }
 276
 277 void highbd_block2d_average_c(uint16_t *src,
 278                               unsigned int src_stride,
 279                               uint16_t *output_ptr,
 280                               unsigned int output_stride,
 281                               unsigned int output_width,
 282                               unsigned int output_height,
 283                               int bd) {
 284   unsigned int i, j;
 285   for (i = 0; i < output_height; ++i) {
 286     for (j = 0; j < output_width; ++j) {
 287       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
 288     }
 289     output_ptr += output_stride;
 290   }
 291 }
 292
 293 void highbd_filter_average_block2d_8_c(const uint16_t *src_ptr,
 294                                        const unsigned int src_stride,
 295                                        const int16_t *HFilter,
 296                                        const int16_t *VFilter,
 297                                        uint16_t *dst_ptr,
 298                                        unsigned int dst_stride,
 299                                        unsigned int output_width,
 300                                        unsigned int output_height,
 301                                        int bd) {
 302   uint16_t tmp[kMaxDimension * kMaxDimension];
 303
 304   assert(output_width <= kMaxDimension);
 305   assert(output_height <= kMaxDimension);
 306   highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
 307                             output_width, output_height, bd);
 308   highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
 309                            output_width, output_height, bd);
 310 }
 311 #endif  // CONFIG_VP9_HIGHBITDEPTH
 312
 313 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 314  public:
 315   static void SetUpTestCase() {
 316     // Force input_ to be unaligned, output to be 16 byte aligned.
 317     input_ = reinterpret_cast<uint8_t*>(
 318         vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
 319     output_ = reinterpret_cast<uint8_t*>(
 320         vpx_memalign(kDataAlignment, kOutputBufferSize));
 321     output_ref_ = reinterpret_cast<uint8_t*>(
 322         vpx_memalign(kDataAlignment, kOutputBufferSize));
 323 #if CONFIG_VP9_HIGHBITDEPTH
 324     input16_ = reinterpret_cast<uint16_t*>(
 325         vpx_memalign(kDataAlignment,
 326                      (kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
 327     output16_ = reinterpret_cast<uint16_t*>(
 328         vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
 329     output16_ref_ = reinterpret_cast<uint16_t*>(
 330         vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
 331 #endif
 332   }
 333
 334   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 335
 336   static void TearDownTestCase() {
 337     vpx_free(input_ - 1);
 338     input_ = NULL;
 339     vpx_free(output_);
 340     output_ = NULL;
 341     vpx_free(output_ref_);
 342     output_ref_ = NULL;
 343 #if CONFIG_VP9_HIGHBITDEPTH
 344     vpx_free(input16_ - 1);
 345     input16_ = NULL;
 346     vpx_free(output16_);
 347     output16_ = NULL;
 348     vpx_free(output16_ref_);
 349     output16_ref_ = NULL;
 350 #endif
 351   }
 352
 353  protected:
 354   static const int kDataAlignment = 16;
 355   static const int kOuterBlockSize = 256;
 356   static const int kInputStride = kOuterBlockSize;
 357   static const int kOutputStride = kOuterBlockSize;
 358   static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
 359   static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
 360
 361   int Width() const { return GET_PARAM(0); }
 362   int Height() const { return GET_PARAM(1); }
 363   int BorderLeft() const {
 364     const int center = (kOuterBlockSize - Width()) / 2;
 365     return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
 366   }
 367   int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
 368
 369   bool IsIndexInBorder(int i) {
 370     return (i < BorderTop() * kOuterBlockSize ||
 371             i >= (BorderTop() + Height()) * kOuterBlockSize ||
 372             i % kOuterBlockSize < BorderLeft() ||
 373             i % kOuterBlockSize >= (BorderLeft() + Width()));
 374   }
 375
 376   virtual void SetUp() {
 377     UUT_ = GET_PARAM(2);
 378 #if CONFIG_VP9_HIGHBITDEPTH
 379     if (UUT_->use_highbd_ != 0)
 380       mask_ = (1 << UUT_->use_highbd_) - 1;
 381     else
 382       mask_ = 255;
 383 #endif
 384     /* Set up guard blocks for an inner block centered in the outer block */
 385     for (int i = 0; i < kOutputBufferSize; ++i) {
 386       if (IsIndexInBorder(i))
 387         output_[i] = 255;
 388       else
 389         output_[i] = 0;
 390     }
 391
 392     ::libvpx_test::ACMRandom prng;
 393     for (int i = 0; i < kInputBufferSize; ++i) {
 394       if (i & 1) {
 395         input_[i] = 255;
 396 #if CONFIG_VP9_HIGHBITDEPTH
 397         input16_[i] = mask_;
 398 #endif
 399       } else {
 400         input_[i] = prng.Rand8Extremes();
 401 #if CONFIG_VP9_HIGHBITDEPTH
 402         input16_[i] = prng.Rand16() & mask_;
 403 #endif
 404       }
 405     }
 406   }
 407
 408   void SetConstantInput(int value) {
 409     memset(input_, value, kInputBufferSize);
 410 #if CONFIG_VP9_HIGHBITDEPTH
 411     vpx_memset16(input16_, value, kInputBufferSize);
 412 #endif
 413   }
 414
 415   void CopyOutputToRef() {
 416     memcpy(output_ref_, output_, kOutputBufferSize);
 417 #if CONFIG_VP9_HIGHBITDEPTH
 418     memcpy(output16_ref_, output16_, kOutputBufferSize);
 419 #endif
 420   }
 421
 422   void CheckGuardBlocks() {
 423     for (int i = 0; i < kOutputBufferSize; ++i) {
 424       if (IsIndexInBorder(i))
 425         EXPECT_EQ(255, output_[i]);
 426     }
 427   }
 428
 429   uint8_t *input() const {
 430 #if CONFIG_VP9_HIGHBITDEPTH
 431     if (UUT_->use_highbd_ == 0) {
 432       return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
 433     } else {
 434       return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize +
 435                                 BorderLeft());
 436     }
 437 #else
 438     return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
 439 #endif
 440   }
 441
 442   uint8_t *output() const {
 443 #if CONFIG_VP9_HIGHBITDEPTH
 444     if (UUT_->use_highbd_ == 0) {
 445       return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
 446     } else {
 447       return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize +
 448                                 BorderLeft());
 449     }
 450 #else
 451     return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
 452 #endif
 453   }
 454
 455   uint8_t *output_ref() const {
 456 #if CONFIG_VP9_HIGHBITDEPTH
 457     if (UUT_->use_highbd_ == 0) {
 458       return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
 459     } else {
 460       return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize +
 461                                 BorderLeft());
 462     }
 463 #else
 464     return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
 465 #endif
 466   }
 467
 468   uint16_t lookup(uint8_t *list, int index) const {
 469 #if CONFIG_VP9_HIGHBITDEPTH
 470     if (UUT_->use_highbd_ == 0) {
 471       return list[index];
 472     } else {
 473       return CONVERT_TO_SHORTPTR(list)[index];
 474     }
 475 #else
 476     return list[index];
 477 #endif
 478   }
 479
 480   void assign_val(uint8_t *list, int index, uint16_t val) const {
 481 #if CONFIG_VP9_HIGHBITDEPTH
 482     if (UUT_->use_highbd_ == 0) {
 483       list[index] = (uint8_t) val;
 484     } else {
 485       CONVERT_TO_SHORTPTR(list)[index] = val;
 486     }
 487 #else
 488     list[index] = (uint8_t) val;
 489 #endif
 490   }
 491
 492   void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr,
 493                                           const unsigned int src_stride,
 494                                           const int16_t *HFilter,
 495                                           const int16_t *VFilter,
 496                                           uint8_t *dst_ptr,
 497                                           unsigned int dst_stride,
 498                                           unsigned int output_width,
 499                                           unsigned int output_height) {
 500 #if CONFIG_VP9_HIGHBITDEPTH
 501     if (UUT_->use_highbd_ == 0) {
 502       filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
 503                                  dst_ptr, dst_stride, output_width,
 504                                  output_height);
 505     } else {
 506       highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr),
 507                                         src_stride, HFilter, VFilter,
 508                                         CONVERT_TO_SHORTPTR(dst_ptr),
 509                                         dst_stride, output_width, output_height,
 510                                         UUT_->use_highbd_);
 511     }
 512 #else
 513     filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
 514                                dst_ptr, dst_stride, output_width,
 515                                output_height);
 516 #endif
 517   }
 518
 519   void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
 520                                   const unsigned int src_stride,
 521                                   const int16_t *HFilter,
 522                                   const int16_t *VFilter,
 523                                   uint8_t *dst_ptr,
 524                                   unsigned int dst_stride,
 525                                   unsigned int output_width,
 526                                   unsigned int output_height) {
 527 #if CONFIG_VP9_HIGHBITDEPTH
 528     if (UUT_->use_highbd_ == 0) {
 529       filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
 530                          dst_ptr, dst_stride, output_width, output_height);
 531     } else {
 532       highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
 533                                 HFilter, VFilter,
 534                                 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
 535                                 output_width, output_height, UUT_->use_highbd_);
 536     }
 537 #else
 538     filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
 539                        dst_ptr, dst_stride, output_width, output_height);
 540 #endif
 541   }
 542
 543   const ConvolveFunctions* UUT_;
 544   static uint8_t* input_;
 545   static uint8_t* output_;
 546   static uint8_t* output_ref_;
 547 #if CONFIG_VP9_HIGHBITDEPTH
 548   static uint16_t* input16_;
 549   static uint16_t* output16_;
 550   static uint16_t* output16_ref_;
 551   int mask_;
 552 #endif
 553 };
 554
 555 uint8_t* ConvolveTest::input_ = NULL;
 556 uint8_t* ConvolveTest::output_ = NULL;
 557 uint8_t* ConvolveTest::output_ref_ = NULL;
 558 #if CONFIG_VP9_HIGHBITDEPTH
 559 uint16_t* ConvolveTest::input16_ = NULL;
 560 uint16_t* ConvolveTest::output16_ = NULL;
 561 uint16_t* ConvolveTest::output16_ref_ = NULL;
 562 #endif
 563
 564 TEST_P(ConvolveTest, GuardBlocks) {
 565   CheckGuardBlocks();
 566 }
 567
 568 TEST_P(ConvolveTest, Copy) {
 569   uint8_t* const in = input();
 570   uint8_t* const out = output();
 571
 572   ASM_REGISTER_STATE_CHECK(
 573       UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
 574                   Width(), Height()));
 575
 576   CheckGuardBlocks();
 577
 578   for (int y = 0; y < Height(); ++y)
 579     for (int x = 0; x < Width(); ++x)
 580       ASSERT_EQ(lookup(out, y * kOutputStride + x),
 581                 lookup(in, y * kInputStride + x))
 582           << "(" << x << "," << y << ")";
 583 }
 584
 585 TEST_P(ConvolveTest, Avg) {
 586   uint8_t* const in = input();
 587   uint8_t* const out = output();
 588   uint8_t* const out_ref = output_ref();
 589   CopyOutputToRef();
 590
 591   ASM_REGISTER_STATE_CHECK(
 592       UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
 593                 Width(), Height()));
 594
 595   CheckGuardBlocks();
 596
 597   for (int y = 0; y < Height(); ++y)
 598     for (int x = 0; x < Width(); ++x)
 599       ASSERT_EQ(lookup(out, y * kOutputStride + x),
 600                 ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
 601                                    lookup(out_ref, y * kOutputStride + x), 1))
 602           << "(" << x << "," << y << ")";
 603 }
 604
 605 TEST_P(ConvolveTest, CopyHoriz) {
 606   uint8_t* const in = input();
 607   uint8_t* const out = output();
 608   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 609
 610   ASM_REGISTER_STATE_CHECK(
 611       UUT_->sh8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
 612                  Width(), Height()));
 613
 614   CheckGuardBlocks();
 615
 616   for (int y = 0; y < Height(); ++y)
 617     for (int x = 0; x < Width(); ++x)
 618       ASSERT_EQ(lookup(out, y * kOutputStride + x),
 619                 lookup(in, y * kInputStride + x))
 620           << "(" << x << "," << y << ")";
 621 }
 622
 623 TEST_P(ConvolveTest, CopyVert) {
 624   uint8_t* const in = input();
 625   uint8_t* const out = output();
 626   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 627
 628   ASM_REGISTER_STATE_CHECK(
 629       UUT_->sv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
 630                  Width(), Height()));
 631
 632   CheckGuardBlocks();
 633
 634   for (int y = 0; y < Height(); ++y)
 635     for (int x = 0; x < Width(); ++x)
 636       ASSERT_EQ(lookup(out, y * kOutputStride + x),
 637                 lookup(in, y * kInputStride + x))
 638           << "(" << x << "," << y << ")";
 639 }
 640
 641 TEST_P(ConvolveTest, Copy2D) {
 642   uint8_t* const in = input();
 643   uint8_t* const out = output();
 644   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 645
 646   ASM_REGISTER_STATE_CHECK(
 647       UUT_->shv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8,
 648                   16, Width(), Height()));
 649
 650   CheckGuardBlocks();
 651
 652   for (int y = 0; y < Height(); ++y)
 653     for (int x = 0; x < Width(); ++x)
 654       ASSERT_EQ(lookup(out, y * kOutputStride + x),
 655                 lookup(in, y * kInputStride + x))
 656           << "(" << x << "," << y << ")";
 657 }
 658
 659 const int kNumFilterBanks = 4;
 660 const int kNumFilters = 16;
 661
 662 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
 663   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 664     const InterpKernel *filters =
 665         vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
 666     for (int i = 0; i < kNumFilters; i++) {
 667       const int p0 = filters[i][0] + filters[i][1];
 668       const int p1 = filters[i][2] + filters[i][3];
 669       const int p2 = filters[i][4] + filters[i][5];
 670       const int p3 = filters[i][6] + filters[i][7];
 671       EXPECT_LE(p0, 128);
 672       EXPECT_LE(p1, 128);
 673       EXPECT_LE(p2, 128);
 674       EXPECT_LE(p3, 128);
 675       EXPECT_LE(p0 + p3, 128);
 676       EXPECT_LE(p0 + p3 + p1, 128);
 677       EXPECT_LE(p0 + p3 + p1 + p2, 128);
 678       EXPECT_EQ(p0 + p1 + p2 + p3, 128);
 679     }
 680   }
 681 }
 682
 683 const int16_t kInvalidFilter[8] = { 0 };
 684
 685 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
 686   uint8_t* const in = input();
 687   uint8_t* const out = output();
 688 #if CONFIG_VP9_HIGHBITDEPTH
 689   uint8_t ref8[kOutputStride * kMaxDimension];
 690   uint16_t ref16[kOutputStride * kMaxDimension];
 691   uint8_t* ref;
 692   if (UUT_->use_highbd_ == 0) {
 693     ref = ref8;
 694   } else {
 695     ref = CONVERT_TO_BYTEPTR(ref16);
 696   }
 697 #else
 698   uint8_t ref[kOutputStride * kMaxDimension];
 699 #endif
 700
 701   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 702     const InterpKernel *filters =
 703         vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
 704
 705     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 706       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 707         wrapper_filter_block2d_8_c(in, kInputStride,
 708                                    filters[filter_x], filters[filter_y],
 709                                    ref, kOutputStride,
 710                                    Width(), Height());
 711
 712         if (filter_x && filter_y)
 713           ASM_REGISTER_STATE_CHECK(
 714               UUT_->hv8_(in, kInputStride, out, kOutputStride,
 715                          filters[filter_x], 16, filters[filter_y], 16,
 716                          Width(), Height()));
 717         else if (filter_y)
 718           ASM_REGISTER_STATE_CHECK(
 719               UUT_->v8_(in, kInputStride, out, kOutputStride,
 720                         kInvalidFilter, 16, filters[filter_y], 16,
 721                         Width(), Height()));
 722         else if (filter_x)
 723           ASM_REGISTER_STATE_CHECK(
 724               UUT_->h8_(in, kInputStride, out, kOutputStride,
 725                         filters[filter_x], 16, kInvalidFilter, 16,
 726                         Width(), Height()));
 727         else
 728           ASM_REGISTER_STATE_CHECK(
 729               UUT_->copy_(in, kInputStride, out, kOutputStride,
 730                           kInvalidFilter, 0, kInvalidFilter, 0,
 731                           Width(), Height()));
 732
 733         CheckGuardBlocks();
 734
 735         for (int y = 0; y < Height(); ++y)
 736           for (int x = 0; x < Width(); ++x)
 737             ASSERT_EQ(lookup(ref, y * kOutputStride + x),
 738                       lookup(out, y * kOutputStride + x))
 739                 << "mismatch at (" << x << "," << y << "), "
 740                 << "filters (" << filter_bank << ","
 741                 << filter_x << "," << filter_y << ")";
 742       }
 743     }
 744   }
 745 }
 746
 747 TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
 748   uint8_t* const in = input();
 749   uint8_t* const out = output();
 750 #if CONFIG_VP9_HIGHBITDEPTH
 751   uint8_t ref8[kOutputStride * kMaxDimension];
 752   uint16_t ref16[kOutputStride * kMaxDimension];
 753   uint8_t* ref;
 754   if (UUT_->use_highbd_ == 0) {
 755     ref = ref8;
 756   } else {
 757     ref = CONVERT_TO_BYTEPTR(ref16);
 758   }
 759 #else
 760   uint8_t ref[kOutputStride * kMaxDimension];
 761 #endif
 762
 763   // Populate ref and out with some random data
 764   ::libvpx_test::ACMRandom prng;
 765   for (int y = 0; y < Height(); ++y) {
 766     for (int x = 0; x < Width(); ++x) {
 767       uint16_t r;
 768 #if CONFIG_VP9_HIGHBITDEPTH
 769       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
 770         r = prng.Rand8Extremes();
 771       } else {
 772         r = prng.Rand16() & mask_;
 773       }
 774 #else
 775       r = prng.Rand8Extremes();
 776 #endif
 777
 778       assign_val(out, y * kOutputStride + x, r);
 779       assign_val(ref, y * kOutputStride + x, r);
 780     }
 781   }
 782
 783   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 784     const InterpKernel *filters =
 785         vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
 786
 787     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 788       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 789         wrapper_filter_average_block2d_8_c(in, kInputStride,
 790                                            filters[filter_x], filters[filter_y],
 791                                            ref, kOutputStride,
 792                                            Width(), Height());
 793
 794         if (filter_x && filter_y)
 795           ASM_REGISTER_STATE_CHECK(
 796               UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
 797                              filters[filter_x], 16, filters[filter_y], 16,
 798                              Width(), Height()));
 799         else if (filter_y)
 800           ASM_REGISTER_STATE_CHECK(
 801               UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
 802                             kInvalidFilter, 16, filters[filter_y], 16,
 803                             Width(), Height()));
 804         else if (filter_x)
 805           ASM_REGISTER_STATE_CHECK(
 806               UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
 807                             filters[filter_x], 16, kInvalidFilter, 16,
 808                             Width(), Height()));
 809         else
 810           ASM_REGISTER_STATE_CHECK(
 811               UUT_->avg_(in, kInputStride, out, kOutputStride,
 812                           kInvalidFilter, 0, kInvalidFilter, 0,
 813                           Width(), Height()));
 814
 815         CheckGuardBlocks();
 816
 817         for (int y = 0; y < Height(); ++y)
 818           for (int x = 0; x < Width(); ++x)
 819             ASSERT_EQ(lookup(ref, y * kOutputStride + x),
 820                       lookup(out, y * kOutputStride + x))
 821                 << "mismatch at (" << x << "," << y << "), "
 822                 << "filters (" << filter_bank << ","
 823                 << filter_x << "," << filter_y << ")";
 824       }
 825     }
 826   }
 827 }
 828
 829 TEST_P(ConvolveTest, FilterExtremes) {
 830   uint8_t *const in = input();
 831   uint8_t *const out = output();
 832 #if CONFIG_VP9_HIGHBITDEPTH
 833   uint8_t ref8[kOutputStride * kMaxDimension];
 834   uint16_t ref16[kOutputStride * kMaxDimension];
 835   uint8_t *ref;
 836   if (UUT_->use_highbd_ == 0) {
 837     ref = ref8;
 838   } else {
 839     ref = CONVERT_TO_BYTEPTR(ref16);
 840   }
 841 #else
 842   uint8_t ref[kOutputStride * kMaxDimension];
 843 #endif
 844
 845   // Populate ref and out with some random data
 846   ::libvpx_test::ACMRandom prng;
 847   for (int y = 0; y < Height(); ++y) {
 848     for (int x = 0; x < Width(); ++x) {
 849       uint16_t r;
 850 #if CONFIG_VP9_HIGHBITDEPTH
 851       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
 852         r = prng.Rand8Extremes();
 853       } else {
 854         r = prng.Rand16() & mask_;
 855       }
 856 #else
 857       r = prng.Rand8Extremes();
 858 #endif
 859       assign_val(out, y * kOutputStride + x, r);
 860       assign_val(ref, y * kOutputStride + x, r);
 861     }
 862   }
 863
 864   for (int axis = 0; axis < 2; axis++) {
 865     int seed_val = 0;
 866     while (seed_val < 256) {
 867       for (int y = 0; y < 8; ++y) {
 868         for (int x = 0; x < 8; ++x) {
 869 #if CONFIG_VP9_HIGHBITDEPTH
 870             assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
 871                        ((seed_val >> (axis ? y : x)) & 1) * mask_);
 872 #else
 873             assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
 874                        ((seed_val >> (axis ? y : x)) & 1) * 255);
 875 #endif
 876           if (axis) seed_val++;
 877         }
 878         if (axis)
 879           seed_val-= 8;
 880         else
 881           seed_val++;
 882       }
 883       if (axis) seed_val += 8;
 884
 885       for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 886         const InterpKernel *filters =
 887             vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
 888         for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 889           for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 890             wrapper_filter_block2d_8_c(in, kInputStride,
 891                                        filters[filter_x], filters[filter_y],
 892                                        ref, kOutputStride,
 893                                        Width(), Height());
 894             if (filter_x && filter_y)
 895               ASM_REGISTER_STATE_CHECK(
 896                   UUT_->hv8_(in, kInputStride, out, kOutputStride,
 897                              filters[filter_x], 16, filters[filter_y], 16,
 898                              Width(), Height()));
 899             else if (filter_y)
 900               ASM_REGISTER_STATE_CHECK(
 901                   UUT_->v8_(in, kInputStride, out, kOutputStride,
 902                             kInvalidFilter, 16, filters[filter_y], 16,
 903                             Width(), Height()));
 904             else if (filter_x)
 905               ASM_REGISTER_STATE_CHECK(
 906                   UUT_->h8_(in, kInputStride, out, kOutputStride,
 907                             filters[filter_x], 16, kInvalidFilter, 16,
 908                             Width(), Height()));
 909             else
 910               ASM_REGISTER_STATE_CHECK(
 911                   UUT_->copy_(in, kInputStride, out, kOutputStride,
 912                               kInvalidFilter, 0, kInvalidFilter, 0,
 913                               Width(), Height()));
 914
 915             for (int y = 0; y < Height(); ++y)
 916               for (int x = 0; x < Width(); ++x)
 917                 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
 918                           lookup(out, y * kOutputStride + x))
 919                     << "mismatch at (" << x << "," << y << "), "
 920                     << "filters (" << filter_bank << ","
 921                     << filter_x << "," << filter_y << ")";
 922           }
 923         }
 924       }
 925     }
 926   }
 927 }
 928
 929 /* This test exercises that enough rows and columns are filtered with every
 930    possible initial fractional positions and scaling steps. */
 931 TEST_P(ConvolveTest, CheckScalingFiltering) {
 932   uint8_t* const in = input();
 933   uint8_t* const out = output();
 934   const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP];
 935
 936   SetConstantInput(127);
 937
 938   for (int frac = 0; frac < 16; ++frac) {
 939     for (int step = 1; step <= 32; ++step) {
 940       /* Test the horizontal and vertical filters in combination. */
 941       ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
 942                                            eighttap[frac], step,
 943                                            eighttap[frac], step,
 944                                            Width(), Height()));
 945
 946       CheckGuardBlocks();
 947
 948       for (int y = 0; y < Height(); ++y) {
 949         for (int x = 0; x < Width(); ++x) {
 950           ASSERT_EQ(lookup(in, y * kInputStride + x),
 951                     lookup(out, y * kOutputStride + x))
 952               << "x == " << x << ", y == " << y
 953               << ", frac == " << frac << ", step == " << step;
 954         }
 955       }
 956     }
 957   }
 958 }
 959
 960 using std::tr1::make_tuple;
 961
 962 #if CONFIG_VP9_HIGHBITDEPTH
 963 #if HAVE_SSE2 && ARCH_X86_64
 964 void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
 965                                  uint8_t *dst, ptrdiff_t dst_stride,
 966                                  const int16_t *filter_x,
 967                                  int filter_x_stride,
 968                                  const int16_t *filter_y,
 969                                  int filter_y_stride,
 970                                  int w, int h) {
 971   vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
 972                                   filter_x_stride, filter_y, filter_y_stride,
 973                                   w, h, 8);
 974 }
 975
 976 void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
 977                                      uint8_t *dst, ptrdiff_t dst_stride,
 978                                      const int16_t *filter_x,
 979                                      int filter_x_stride,
 980                                      const int16_t *filter_y,
 981                                      int filter_y_stride,
 982                                      int w, int h) {
 983   vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
 984                                       filter_x, filter_x_stride,
 985                                       filter_y, filter_y_stride, w, h, 8);
 986 }
 987
 988 void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
 989                                 uint8_t *dst, ptrdiff_t dst_stride,
 990                                 const int16_t *filter_x,
 991                                 int filter_x_stride,
 992                                 const int16_t *filter_y,
 993                                 int filter_y_stride,
 994                                 int w, int h) {
 995   vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
 996                                  filter_x, filter_x_stride,
 997                                  filter_y, filter_y_stride, w, h, 8);
 998 }
 999
1000 void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1001                                     uint8_t *dst, ptrdiff_t dst_stride,
1002                                     const int16_t *filter_x,
1003                                     int filter_x_stride,
1004                                     const int16_t *filter_y,
1005                                     int filter_y_stride,
1006                                     int w, int h) {
1007   vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
1008                                      filter_x, filter_x_stride,
1009                                      filter_y, filter_y_stride, w, h, 8);
1010 }
1011
1012 void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1013                            uint8_t *dst, ptrdiff_t dst_stride,
1014                            const int16_t *filter_x,
1015                            int filter_x_stride,
1016                            const int16_t *filter_y,
1017                            int filter_y_stride,
1018                            int w, int h) {
1019   vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
1020                             filter_x, filter_x_stride,
1021                             filter_y, filter_y_stride, w, h, 8);
1022 }
1023
1024 void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1025                                uint8_t *dst, ptrdiff_t dst_stride,
1026                                const int16_t *filter_x,
1027                                int filter_x_stride,
1028                                const int16_t *filter_y,
1029                                int filter_y_stride,
1030                                int w, int h) {
1031   vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
1032                                 filter_x, filter_x_stride,
1033                                 filter_y, filter_y_stride, w, h, 8);
1034 }
1035
1036 void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1037                                   uint8_t *dst, ptrdiff_t dst_stride,
1038                                   const int16_t *filter_x,
1039                                   int filter_x_stride,
1040                                   const int16_t *filter_y,
1041                                   int filter_y_stride,
1042                                   int w, int h) {
1043   vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
1044                                   filter_x, filter_x_stride,
1045                                   filter_y, filter_y_stride, w, h, 10);
1046 }
1047
1048 void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1049                                       uint8_t *dst, ptrdiff_t dst_stride,
1050                                       const int16_t *filter_x,
1051                                       int filter_x_stride,
1052                                       const int16_t *filter_y,
1053                                       int filter_y_stride,
1054                                       int w, int h) {
1055   vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
1056                                       filter_x, filter_x_stride,
1057                                       filter_y, filter_y_stride, w, h, 10);
1058 }
1059
1060 void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1061                                  uint8_t *dst, ptrdiff_t dst_stride,
1062                                  const int16_t *filter_x,
1063                                  int filter_x_stride,
1064                                  const int16_t *filter_y,
1065                                  int filter_y_stride,
1066                                  int w, int h) {
1067   vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
1068                                  filter_x, filter_x_stride,
1069                                  filter_y, filter_y_stride, w, h, 10);
1070 }
1071
1072 void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1073                                      uint8_t *dst, ptrdiff_t dst_stride,
1074                                      const int16_t *filter_x,
1075                                      int filter_x_stride,
1076                                      const int16_t *filter_y,
1077                                      int filter_y_stride,
1078                                      int w, int h) {
1079   vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
1080                                      filter_x, filter_x_stride,
1081                                      filter_y, filter_y_stride, w, h, 10);
1082 }
1083
1084 void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1085                             uint8_t *dst, ptrdiff_t dst_stride,
1086                             const int16_t *filter_x,
1087                             int filter_x_stride,
1088                             const int16_t *filter_y,
1089                             int filter_y_stride,
1090                             int w, int h) {
1091   vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
1092                             filter_x, filter_x_stride,
1093                             filter_y, filter_y_stride, w, h, 10);
1094 }
1095
1096 void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1097                                 uint8_t *dst, ptrdiff_t dst_stride,
1098                                 const int16_t *filter_x,
1099                                 int filter_x_stride,
1100                                 const int16_t *filter_y,
1101                                 int filter_y_stride,
1102                                 int w, int h) {
1103   vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
1104                                 filter_x, filter_x_stride,
1105                                 filter_y, filter_y_stride, w, h, 10);
1106 }
1107
1108 void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1109                                   uint8_t *dst, ptrdiff_t dst_stride,
1110                                   const int16_t *filter_x,
1111                                   int filter_x_stride,
1112                                   const int16_t *filter_y,
1113                                   int filter_y_stride,
1114                                   int w, int h) {
1115   vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
1116                                   filter_x, filter_x_stride,
1117                                   filter_y, filter_y_stride, w, h, 12);
1118 }
1119
1120 void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1121                                       uint8_t *dst, ptrdiff_t dst_stride,
1122                                       const int16_t *filter_x,
1123                                       int filter_x_stride,
1124                                       const int16_t *filter_y,
1125                                       int filter_y_stride,
1126                                       int w, int h) {
1127   vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
1128                                       filter_x, filter_x_stride,
1129                                       filter_y, filter_y_stride, w, h, 12);
1130 }
1131
1132 void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1133                                  uint8_t *dst, ptrdiff_t dst_stride,
1134                                  const int16_t *filter_x,
1135                                  int filter_x_stride,
1136                                  const int16_t *filter_y,
1137                                  int filter_y_stride,
1138                                  int w, int h) {
1139   vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
1140                                  filter_x, filter_x_stride,
1141                                  filter_y, filter_y_stride, w, h, 12);
1142 }
1143
1144 void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1145                                      uint8_t *dst, ptrdiff_t dst_stride,
1146                                      const int16_t *filter_x,
1147                                      int filter_x_stride,
1148                                      const int16_t *filter_y,
1149                                      int filter_y_stride,
1150                                      int w, int h) {
1151   vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
1152                                      filter_x, filter_x_stride,
1153                                      filter_y, filter_y_stride, w, h, 12);
1154 }
1155
1156 void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1157                             uint8_t *dst, ptrdiff_t dst_stride,
1158                             const int16_t *filter_x,
1159                             int filter_x_stride,
1160                             const int16_t *filter_y,
1161                             int filter_y_stride,
1162                             int w, int h) {
1163   vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
1164                             filter_x, filter_x_stride,
1165                             filter_y, filter_y_stride, w, h, 12);
1166 }
1167
1168 void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1169                                 uint8_t *dst, ptrdiff_t dst_stride,
1170                                 const int16_t *filter_x,
1171                                 int filter_x_stride,
1172                                 const int16_t *filter_y,
1173                                 int filter_y_stride,
1174                                 int w, int h) {
1175   vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
1176                                 filter_x, filter_x_stride,
1177                                 filter_y, filter_y_stride, w, h, 12);
1178 }
1179 #endif  // HAVE_SSE2 && ARCH_X86_64
1180
1181 void wrap_convolve_copy_c_8(const uint8_t *src, ptrdiff_t src_stride,
1182                             uint8_t *dst, ptrdiff_t dst_stride,
1183                             const int16_t *filter_x,
1184                             int filter_x_stride,
1185                             const int16_t *filter_y,
1186                             int filter_y_stride,
1187                             int w, int h) {
1188   vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
1189                              filter_x, filter_x_stride,
1190                              filter_y, filter_y_stride, w, h, 8);
1191 }
1192
1193 void wrap_convolve_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
1194                            uint8_t *dst, ptrdiff_t dst_stride,
1195                            const int16_t *filter_x,
1196                            int filter_x_stride,
1197                            const int16_t *filter_y,
1198                            int filter_y_stride,
1199                            int w, int h) {
1200   vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
1201                             filter_x, filter_x_stride,
1202                             filter_y, filter_y_stride, w, h, 8);
1203 }
1204
1205 void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1206                               uint8_t *dst, ptrdiff_t dst_stride,
1207                               const int16_t *filter_x,
1208                               int filter_x_stride,
1209                               const int16_t *filter_y,
1210                               int filter_y_stride,
1211                               int w, int h) {
1212   vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
1213                                filter_x, filter_x_stride,
1214                                filter_y, filter_y_stride, w, h, 8);
1215 }
1216
1217 void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1218                                   uint8_t *dst, ptrdiff_t dst_stride,
1219                                   const int16_t *filter_x,
1220                                   int filter_x_stride,
1221                                   const int16_t *filter_y,
1222                                   int filter_y_stride,
1223                                   int w, int h) {
1224   vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
1225                                    filter_x, filter_x_stride,
1226                                    filter_y, filter_y_stride, w, h, 8);
1227 }
1228
1229 void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1230                              uint8_t *dst, ptrdiff_t dst_stride,
1231                              const int16_t *filter_x,
1232                              int filter_x_stride,
1233                              const int16_t *filter_y,
1234                              int filter_y_stride,
1235                              int w, int h) {
1236   vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
1237                               filter_x, filter_x_stride,
1238                               filter_y, filter_y_stride, w, h, 8);
1239 }
1240
1241 void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1242                                  uint8_t *dst, ptrdiff_t dst_stride,
1243                                  const int16_t *filter_x,
1244                                  int filter_x_stride,
1245                                  const int16_t *filter_y,
1246                                  int filter_y_stride,
1247                                  int w, int h) {
1248   vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
1249                                   filter_x, filter_x_stride,
1250                                   filter_y, filter_y_stride, w, h, 8);
1251 }
1252
1253 void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride,
1254                         uint8_t *dst, ptrdiff_t dst_stride,
1255                         const int16_t *filter_x,
1256                         int filter_x_stride,
1257                         const int16_t *filter_y,
1258                         int filter_y_stride,
1259                         int w, int h) {
1260   vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
1261                          filter_x, filter_x_stride,
1262                          filter_y, filter_y_stride, w, h, 8);
1263 }
1264
1265 void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
1266                             uint8_t *dst, ptrdiff_t dst_stride,
1267                             const int16_t *filter_x,
1268                             int filter_x_stride,
1269                             const int16_t *filter_y,
1270                             int filter_y_stride,
1271                             int w, int h) {
1272   vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
1273                              filter_x, filter_x_stride,
1274                              filter_y, filter_y_stride, w, h, 8);
1275 }
1276
1277 void wrap_convolve_copy_c_10(const uint8_t *src, ptrdiff_t src_stride,
1278                              uint8_t *dst, ptrdiff_t dst_stride,
1279                              const int16_t *filter_x,
1280                              int filter_x_stride,
1281                              const int16_t *filter_y,
1282                              int filter_y_stride,
1283                              int w, int h) {
1284   vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
1285                              filter_x, filter_x_stride,
1286                              filter_y, filter_y_stride, w, h, 10);
1287 }
1288
1289 void wrap_convolve_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
1290                             uint8_t *dst, ptrdiff_t dst_stride,
1291                             const int16_t *filter_x,
1292                             int filter_x_stride,
1293                             const int16_t *filter_y,
1294                             int filter_y_stride,
1295                             int w, int h) {
1296   vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
1297                             filter_x, filter_x_stride,
1298                             filter_y, filter_y_stride, w, h, 10);
1299 }
1300
1301 void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1302                                uint8_t *dst, ptrdiff_t dst_stride,
1303                                const int16_t *filter_x,
1304                                int filter_x_stride,
1305                                const int16_t *filter_y,
1306                                int filter_y_stride,
1307                                int w, int h) {
1308   vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
1309                                filter_x, filter_x_stride,
1310                                filter_y, filter_y_stride, w, h, 10);
1311 }
1312
1313 void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1314                                    uint8_t *dst, ptrdiff_t dst_stride,
1315                                    const int16_t *filter_x,
1316                                    int filter_x_stride,
1317                                    const int16_t *filter_y,
1318                                    int filter_y_stride,
1319                                    int w, int h) {
1320   vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
1321                                    filter_x, filter_x_stride,
1322                                    filter_y, filter_y_stride, w, h, 10);
1323 }
1324
1325 void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1326                               uint8_t *dst, ptrdiff_t dst_stride,
1327                               const int16_t *filter_x,
1328                               int filter_x_stride,
1329                               const int16_t *filter_y,
1330                               int filter_y_stride,
1331                               int w, int h) {
1332   vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
1333                               filter_x, filter_x_stride,
1334                               filter_y, filter_y_stride, w, h, 10);
1335 }
1336
1337 void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1338                                   uint8_t *dst, ptrdiff_t dst_stride,
1339                                   const int16_t *filter_x,
1340                                   int filter_x_stride,
1341                                   const int16_t *filter_y,
1342                                   int filter_y_stride,
1343                                   int w, int h) {
1344   vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
1345                                   filter_x, filter_x_stride,
1346                                   filter_y, filter_y_stride, w, h, 10);
1347 }
1348
1349 void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride,
1350                          uint8_t *dst, ptrdiff_t dst_stride,
1351                          const int16_t *filter_x,
1352                          int filter_x_stride,
1353                          const int16_t *filter_y,
1354                          int filter_y_stride,
1355                          int w, int h) {
1356   vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
1357                          filter_x, filter_x_stride,
1358                          filter_y, filter_y_stride, w, h, 10);
1359 }
1360
1361 void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
1362                              uint8_t *dst, ptrdiff_t dst_stride,
1363                              const int16_t *filter_x,
1364                              int filter_x_stride,
1365                              const int16_t *filter_y,
1366                              int filter_y_stride,
1367                              int w, int h) {
1368   vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
1369                              filter_x, filter_x_stride,
1370                              filter_y, filter_y_stride, w, h, 10);
1371 }
1372
1373 void wrap_convolve_copy_c_12(const uint8_t *src, ptrdiff_t src_stride,
1374                              uint8_t *dst, ptrdiff_t dst_stride,
1375                              const int16_t *filter_x,
1376                              int filter_x_stride,
1377                              const int16_t *filter_y,
1378                              int filter_y_stride,
1379                              int w, int h) {
1380   vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
1381                              filter_x, filter_x_stride,
1382                              filter_y, filter_y_stride, w, h, 12);
1383 }
1384
1385 void wrap_convolve_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
1386                             uint8_t *dst, ptrdiff_t dst_stride,
1387                             const int16_t *filter_x,
1388                             int filter_x_stride,
1389                             const int16_t *filter_y,
1390                             int filter_y_stride,
1391                             int w, int h) {
1392   vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
1393                             filter_x, filter_x_stride,
1394                             filter_y, filter_y_stride, w, h, 12);
1395 }
1396
1397 void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1398                                uint8_t *dst, ptrdiff_t dst_stride,
1399                                const int16_t *filter_x,
1400                                int filter_x_stride,
1401                                const int16_t *filter_y,
1402                                int filter_y_stride,
1403                                int w, int h) {
1404   vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
1405                                filter_x, filter_x_stride,
1406                                filter_y, filter_y_stride, w, h, 12);
1407 }
1408
1409 void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1410                                    uint8_t *dst, ptrdiff_t dst_stride,
1411                                    const int16_t *filter_x,
1412                                    int filter_x_stride,
1413                                    const int16_t *filter_y,
1414                                    int filter_y_stride,
1415                                    int w, int h) {
1416   vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
1417                                    filter_x, filter_x_stride,
1418                                    filter_y, filter_y_stride, w, h, 12);
1419 }
1420
1421 void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1422                               uint8_t *dst, ptrdiff_t dst_stride,
1423                               const int16_t *filter_x,
1424                               int filter_x_stride,
1425                               const int16_t *filter_y,
1426                               int filter_y_stride,
1427                               int w, int h) {
1428   vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
1429                               filter_x, filter_x_stride,
1430                               filter_y, filter_y_stride, w, h, 12);
1431 }
1432
1433 void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1434                                   uint8_t *dst, ptrdiff_t dst_stride,
1435                                   const int16_t *filter_x,
1436                                   int filter_x_stride,
1437                                   const int16_t *filter_y,
1438                                   int filter_y_stride,
1439                                   int w, int h) {
1440   vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
1441                                   filter_x, filter_x_stride,
1442                                   filter_y, filter_y_stride, w, h, 12);
1443 }
1444
1445 void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride,
1446                          uint8_t *dst, ptrdiff_t dst_stride,
1447                          const int16_t *filter_x,
1448                          int filter_x_stride,
1449                          const int16_t *filter_y,
1450                          int filter_y_stride,
1451                          int w, int h) {
1452   vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
1453                          filter_x, filter_x_stride,
1454                          filter_y, filter_y_stride, w, h, 12);
1455 }
1456
1457 void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
1458                              uint8_t *dst, ptrdiff_t dst_stride,
1459                              const int16_t *filter_x,
1460                              int filter_x_stride,
1461                              const int16_t *filter_y,
1462                              int filter_y_stride,
1463                              int w, int h) {
1464   vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
1465                              filter_x, filter_x_stride,
1466                              filter_y, filter_y_stride, w, h, 12);
1467 }
1468
1469 const ConvolveFunctions convolve8_c(
1470     wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
1471     wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1472     wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
1473     wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
1474     wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1475     wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
1476     wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
1477 INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
1478     make_tuple(4, 4, &convolve8_c),
1479     make_tuple(8, 4, &convolve8_c),
1480     make_tuple(4, 8, &convolve8_c),
1481     make_tuple(8, 8, &convolve8_c),
1482     make_tuple(16, 8, &convolve8_c),
1483     make_tuple(8, 16, &convolve8_c),
1484     make_tuple(16, 16, &convolve8_c),
1485     make_tuple(32, 16, &convolve8_c),
1486     make_tuple(16, 32, &convolve8_c),
1487     make_tuple(32, 32, &convolve8_c),
1488     make_tuple(64, 32, &convolve8_c),
1489     make_tuple(32, 64, &convolve8_c),
1490     make_tuple(64, 64, &convolve8_c)));
1491 const ConvolveFunctions convolve10_c(
1492     wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
1493     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1494     wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
1495     wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
1496     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1497     wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
1498     wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
1499 INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
1500     make_tuple(4, 4, &convolve10_c),
1501     make_tuple(8, 4, &convolve10_c),
1502     make_tuple(4, 8, &convolve10_c),
1503     make_tuple(8, 8, &convolve10_c),
1504     make_tuple(16, 8, &convolve10_c),
1505     make_tuple(8, 16, &convolve10_c),
1506     make_tuple(16, 16, &convolve10_c),
1507     make_tuple(32, 16, &convolve10_c),
1508     make_tuple(16, 32, &convolve10_c),
1509     make_tuple(32, 32, &convolve10_c),
1510     make_tuple(64, 32, &convolve10_c),
1511     make_tuple(32, 64, &convolve10_c),
1512     make_tuple(64, 64, &convolve10_c)));
1513 const ConvolveFunctions convolve12_c(
1514     wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
1515     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1516     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
1517     wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
1518     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1519     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
1520     wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
1521 INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
1522     make_tuple(4, 4, &convolve12_c),
1523     make_tuple(8, 4, &convolve12_c),
1524     make_tuple(4, 8, &convolve12_c),
1525     make_tuple(8, 8, &convolve12_c),
1526     make_tuple(16, 8, &convolve12_c),
1527     make_tuple(8, 16, &convolve12_c),
1528     make_tuple(16, 16, &convolve12_c),
1529     make_tuple(32, 16, &convolve12_c),
1530     make_tuple(16, 32, &convolve12_c),
1531     make_tuple(32, 32, &convolve12_c),
1532     make_tuple(64, 32, &convolve12_c),
1533     make_tuple(32, 64, &convolve12_c),
1534     make_tuple(64, 64, &convolve12_c)));
1535
1536 #else
1537
1538 const ConvolveFunctions convolve8_c(
1539     vpx_convolve_copy_c, vpx_convolve_avg_c,
1540     vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c,
1541     vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c,
1542     vpx_convolve8_c, vpx_convolve8_avg_c,
1543     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1544     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1545     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1546
1547 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
1548     make_tuple(4, 4, &convolve8_c),
1549     make_tuple(8, 4, &convolve8_c),
1550     make_tuple(4, 8, &convolve8_c),
1551     make_tuple(8, 8, &convolve8_c),
1552     make_tuple(16, 8, &convolve8_c),
1553     make_tuple(8, 16, &convolve8_c),
1554     make_tuple(16, 16, &convolve8_c),
1555     make_tuple(32, 16, &convolve8_c),
1556     make_tuple(16, 32, &convolve8_c),
1557     make_tuple(32, 32, &convolve8_c),
1558     make_tuple(64, 32, &convolve8_c),
1559     make_tuple(32, 64, &convolve8_c),
1560     make_tuple(64, 64, &convolve8_c)));
1561 #endif
1562
1563 #if HAVE_SSE2 && ARCH_X86_64
1564 #if CONFIG_VP9_HIGHBITDEPTH
1565 const ConvolveFunctions convolve8_sse2(
1566     wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
1567     wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1568     wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1569     wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
1570     wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1571     wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1572     wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1573 const ConvolveFunctions convolve10_sse2(
1574     wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
1575     wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1576     wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1577     wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
1578     wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1579     wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1580     wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
1581 const ConvolveFunctions convolve12_sse2(
1582     wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
1583     wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1584     wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1585     wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
1586     wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1587     wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1588     wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
1589 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
1590     make_tuple(4, 4, &convolve8_sse2),
1591     make_tuple(8, 4, &convolve8_sse2),
1592     make_tuple(4, 8, &convolve8_sse2),
1593     make_tuple(8, 8, &convolve8_sse2),
1594     make_tuple(16, 8, &convolve8_sse2),
1595     make_tuple(8, 16, &convolve8_sse2),
1596     make_tuple(16, 16, &convolve8_sse2),
1597     make_tuple(32, 16, &convolve8_sse2),
1598     make_tuple(16, 32, &convolve8_sse2),
1599     make_tuple(32, 32, &convolve8_sse2),
1600     make_tuple(64, 32, &convolve8_sse2),
1601     make_tuple(32, 64, &convolve8_sse2),
1602     make_tuple(64, 64, &convolve8_sse2),
1603     make_tuple(4, 4, &convolve10_sse2),
1604     make_tuple(8, 4, &convolve10_sse2),
1605     make_tuple(4, 8, &convolve10_sse2),
1606     make_tuple(8, 8, &convolve10_sse2),
1607     make_tuple(16, 8, &convolve10_sse2),
1608     make_tuple(8, 16, &convolve10_sse2),
1609     make_tuple(16, 16, &convolve10_sse2),
1610     make_tuple(32, 16, &convolve10_sse2),
1611     make_tuple(16, 32, &convolve10_sse2),
1612     make_tuple(32, 32, &convolve10_sse2),
1613     make_tuple(64, 32, &convolve10_sse2),
1614     make_tuple(32, 64, &convolve10_sse2),
1615     make_tuple(64, 64, &convolve10_sse2),
1616     make_tuple(4, 4, &convolve12_sse2),
1617     make_tuple(8, 4, &convolve12_sse2),
1618     make_tuple(4, 8, &convolve12_sse2),
1619     make_tuple(8, 8, &convolve12_sse2),
1620     make_tuple(16, 8, &convolve12_sse2),
1621     make_tuple(8, 16, &convolve12_sse2),
1622     make_tuple(16, 16, &convolve12_sse2),
1623     make_tuple(32, 16, &convolve12_sse2),
1624     make_tuple(16, 32, &convolve12_sse2),
1625     make_tuple(32, 32, &convolve12_sse2),
1626     make_tuple(64, 32, &convolve12_sse2),
1627     make_tuple(32, 64, &convolve12_sse2),
1628     make_tuple(64, 64, &convolve12_sse2)));
1629 #else
1630 const ConvolveFunctions convolve8_sse2(
1631 #if CONFIG_USE_X86INC
1632     vpx_convolve_copy_sse2, vpx_convolve_avg_sse2,
1633 #else
1634     vpx_convolve_copy_c, vpx_convolve_avg_c,
1635 #endif  // CONFIG_USE_X86INC
1636     vpx_convolve8_horiz_sse2, vpx_convolve8_avg_horiz_sse2,
1637     vpx_convolve8_vert_sse2, vpx_convolve8_avg_vert_sse2,
1638     vpx_convolve8_sse2, vpx_convolve8_avg_sse2,
1639     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1640     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1641     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1642
1643 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
1644     make_tuple(4, 4, &convolve8_sse2),
1645     make_tuple(8, 4, &convolve8_sse2),
1646     make_tuple(4, 8, &convolve8_sse2),
1647     make_tuple(8, 8, &convolve8_sse2),
1648     make_tuple(16, 8, &convolve8_sse2),
1649     make_tuple(8, 16, &convolve8_sse2),
1650     make_tuple(16, 16, &convolve8_sse2),
1651     make_tuple(32, 16, &convolve8_sse2),
1652     make_tuple(16, 32, &convolve8_sse2),
1653     make_tuple(32, 32, &convolve8_sse2),
1654     make_tuple(64, 32, &convolve8_sse2),
1655     make_tuple(32, 64, &convolve8_sse2),
1656     make_tuple(64, 64, &convolve8_sse2)));
1657 #endif  // CONFIG_VP9_HIGHBITDEPTH
1658 #endif
1659
1660 #if HAVE_SSSE3
1661 const ConvolveFunctions convolve8_ssse3(
1662     vpx_convolve_copy_c, vpx_convolve_avg_c,
1663     vpx_convolve8_horiz_ssse3, vpx_convolve8_avg_horiz_ssse3,
1664     vpx_convolve8_vert_ssse3, vpx_convolve8_avg_vert_ssse3,
1665     vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
1666     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1667     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1668     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1669
1670 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
1671     make_tuple(4, 4, &convolve8_ssse3),
1672     make_tuple(8, 4, &convolve8_ssse3),
1673     make_tuple(4, 8, &convolve8_ssse3),
1674     make_tuple(8, 8, &convolve8_ssse3),
1675     make_tuple(16, 8, &convolve8_ssse3),
1676     make_tuple(8, 16, &convolve8_ssse3),
1677     make_tuple(16, 16, &convolve8_ssse3),
1678     make_tuple(32, 16, &convolve8_ssse3),
1679     make_tuple(16, 32, &convolve8_ssse3),
1680     make_tuple(32, 32, &convolve8_ssse3),
1681     make_tuple(64, 32, &convolve8_ssse3),
1682     make_tuple(32, 64, &convolve8_ssse3),
1683     make_tuple(64, 64, &convolve8_ssse3)));
1684 #endif
1685
1686 #if HAVE_AVX2 && HAVE_SSSE3
1687 const ConvolveFunctions convolve8_avx2(
1688     vpx_convolve_copy_c, vpx_convolve_avg_c,
1689     vpx_convolve8_horiz_avx2, vpx_convolve8_avg_horiz_ssse3,
1690     vpx_convolve8_vert_avx2, vpx_convolve8_avg_vert_ssse3,
1691     vpx_convolve8_avx2, vpx_convolve8_avg_ssse3,
1692     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1693     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1694     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1695
1696 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
1697     make_tuple(4, 4, &convolve8_avx2),
1698     make_tuple(8, 4, &convolve8_avx2),
1699     make_tuple(4, 8, &convolve8_avx2),
1700     make_tuple(8, 8, &convolve8_avx2),
1701     make_tuple(8, 16, &convolve8_avx2),
1702     make_tuple(16, 8, &convolve8_avx2),
1703     make_tuple(16, 16, &convolve8_avx2),
1704     make_tuple(32, 16, &convolve8_avx2),
1705     make_tuple(16, 32, &convolve8_avx2),
1706     make_tuple(32, 32, &convolve8_avx2),
1707     make_tuple(64, 32, &convolve8_avx2),
1708     make_tuple(32, 64, &convolve8_avx2),
1709     make_tuple(64, 64, &convolve8_avx2)));
1710 #endif  // HAVE_AVX2 && HAVE_SSSE3
1711
1712 #if HAVE_NEON
1713 #if HAVE_NEON_ASM
1714 const ConvolveFunctions convolve8_neon(
1715     vpx_convolve_copy_neon, vpx_convolve_avg_neon,
1716     vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
1717     vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
1718     vpx_convolve8_neon, vpx_convolve8_avg_neon,
1719     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1720     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1721     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1722 #else  // HAVE_NEON
1723 const ConvolveFunctions convolve8_neon(
1724     vpx_convolve_copy_neon, vpx_convolve_avg_neon,
1725     vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
1726     vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
1727     vpx_convolve8_neon, vpx_convolve8_avg_neon,
1728     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1729     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1730     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1731 #endif  // HAVE_NEON_ASM
1732
1733 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
1734     make_tuple(4, 4, &convolve8_neon),
1735     make_tuple(8, 4, &convolve8_neon),
1736     make_tuple(4, 8, &convolve8_neon),
1737     make_tuple(8, 8, &convolve8_neon),
1738     make_tuple(16, 8, &convolve8_neon),
1739     make_tuple(8, 16, &convolve8_neon),
1740     make_tuple(16, 16, &convolve8_neon),
1741     make_tuple(32, 16, &convolve8_neon),
1742     make_tuple(16, 32, &convolve8_neon),
1743     make_tuple(32, 32, &convolve8_neon),
1744     make_tuple(64, 32, &convolve8_neon),
1745     make_tuple(32, 64, &convolve8_neon),
1746     make_tuple(64, 64, &convolve8_neon)));
1747 #endif  // HAVE_NEON
1748
1749 #if HAVE_DSPR2
1750 const ConvolveFunctions convolve8_dspr2(
1751     vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2,
1752     vpx_convolve8_horiz_dspr2, vpx_convolve8_avg_horiz_dspr2,
1753     vpx_convolve8_vert_dspr2, vpx_convolve8_avg_vert_dspr2,
1754     vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2,
1755     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1756     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1757     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1758
1759 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
1760     make_tuple(4, 4, &convolve8_dspr2),
1761     make_tuple(8, 4, &convolve8_dspr2),
1762     make_tuple(4, 8, &convolve8_dspr2),
1763     make_tuple(8, 8, &convolve8_dspr2),
1764     make_tuple(16, 8, &convolve8_dspr2),
1765     make_tuple(8, 16, &convolve8_dspr2),
1766     make_tuple(16, 16, &convolve8_dspr2),
1767     make_tuple(32, 16, &convolve8_dspr2),
1768     make_tuple(16, 32, &convolve8_dspr2),
1769     make_tuple(32, 32, &convolve8_dspr2),
1770     make_tuple(64, 32, &convolve8_dspr2),
1771     make_tuple(32, 64, &convolve8_dspr2),
1772     make_tuple(64, 64, &convolve8_dspr2)));
1773 #endif
1774
1775 #if HAVE_MSA
1776 const ConvolveFunctions convolve8_msa(
1777     vpx_convolve_copy_msa, vpx_convolve_avg_msa,
1778     vpx_convolve8_horiz_msa, vpx_convolve8_avg_horiz_msa,
1779     vpx_convolve8_vert_msa, vpx_convolve8_avg_vert_msa,
1780     vpx_convolve8_msa, vpx_convolve8_avg_msa,
1781     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1782     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1783     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1784
1785 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
1786     make_tuple(4, 4, &convolve8_msa),
1787     make_tuple(8, 4, &convolve8_msa),
1788     make_tuple(4, 8, &convolve8_msa),
1789     make_tuple(8, 8, &convolve8_msa),
1790     make_tuple(16, 8, &convolve8_msa),
1791     make_tuple(8, 16, &convolve8_msa),
1792     make_tuple(16, 16, &convolve8_msa),
1793     make_tuple(32, 16, &convolve8_msa),
1794     make_tuple(16, 32, &convolve8_msa),
1795     make_tuple(32, 32, &convolve8_msa),
1796     make_tuple(64, 32, &convolve8_msa),
1797     make_tuple(32, 64, &convolve8_msa),
1798     make_tuple(64, 64, &convolve8_msa)));
1799 #endif  // HAVE_MSA
1800 }  // namespace