granicus.if.org Git - libvpx/blob - test/partial_idct_test.cc

   1 /*
   2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <math.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14
  15 #include <limits>
  16
  17 #include "third_party/googletest/src/include/gtest/gtest.h"
  18
  19 #include "./vp9_rtcd.h"
  20 #include "./vpx_dsp_rtcd.h"
  21 #include "test/acm_random.h"
  22 #include "test/clear_system_state.h"
  23 #include "test/register_state_check.h"
  24 #include "test/util.h"
  25 #include "vp9/common/vp9_blockd.h"
  26 #include "vp9/common/vp9_scan.h"
  27 #include "vpx/vpx_integer.h"
  28 #include "vpx_ports/vpx_timer.h"
  29
  30 using libvpx_test::ACMRandom;
  31
  32 namespace {
  33
  34 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
  35 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
  36 typedef void (*InvTxfmWithBdFunc)(const tran_low_t *in, uint8_t *out,
  37                                   int stride, int bd);
  38
  39 template <InvTxfmFunc fn>
  40 void wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
  41   (void)bd;
  42   fn(in, out, stride);
  43 }
  44
  45 #if CONFIG_VP9_HIGHBITDEPTH
  46 template <InvTxfmWithBdFunc fn>
  47 void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
  48   fn(in, CONVERT_TO_BYTEPTR(out), stride, bd);
  49 }
  50 #endif
  51
  52 typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmWithBdFunc, InvTxfmWithBdFunc,
  53                         TX_SIZE, int, int, int>
  54     PartialInvTxfmParam;
  55 const int kMaxNumCoeffs = 1024;
  56 const int kCountTestBlock = 1000;
  57
  58 // https://bugs.chromium.org/p/webm/issues/detail?id=1332
  59 // The functions specified do not pass with INT16_MIN/MAX. They fail at the
  60 // value specified, but pass when 1 is added/subtracted.
  61 int16_t MaxSupportedCoeff(InvTxfmWithBdFunc a) {
  62 #if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
  63   if (a == &wrapper<vpx_idct8x8_64_add_ssse3> ||
  64       a == &wrapper<vpx_idct8x8_12_add_ssse3>) {
  65     return 23625 - 1;
  66   }
  67 #else
  68   (void)a;
  69 #endif
  70   return std::numeric_limits<int16_t>::max();
  71 }
  72
  73 int16_t MinSupportedCoeff(InvTxfmWithBdFunc a) {
  74 #if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
  75   if (a == &wrapper<vpx_idct8x8_64_add_ssse3> ||
  76       a == &wrapper<vpx_idct8x8_12_add_ssse3>) {
  77     return -23625 + 1;
  78   }
  79 #else
  80   (void)a;
  81 #endif
  82   return std::numeric_limits<int16_t>::min();
  83 }
  84
  85 class PartialIDctTest : public ::testing::TestWithParam<PartialInvTxfmParam> {
  86  public:
  87   virtual ~PartialIDctTest() {}
  88   virtual void SetUp() {
  89     rnd_.Reset(ACMRandom::DeterministicSeed());
  90     ftxfm_ = GET_PARAM(0);
  91     full_itxfm_ = GET_PARAM(1);
  92     partial_itxfm_ = GET_PARAM(2);
  93     tx_size_ = GET_PARAM(3);
  94     last_nonzero_ = GET_PARAM(4);
  95     bit_depth_ = GET_PARAM(5);
  96     pixel_size_ = GET_PARAM(6);
  97     mask_ = (1 << bit_depth_) - 1;
  98
  99     switch (tx_size_) {
 100       case TX_4X4: size_ = 4; break;
 101       case TX_8X8: size_ = 8; break;
 102       case TX_16X16: size_ = 16; break;
 103       case TX_32X32: size_ = 32; break;
 104       default: FAIL() << "Wrong Size!"; break;
 105     }
 106
 107     // Randomize stride_ to a value less than or equal to 1024
 108     stride_ = rnd_(1024) + 1;
 109     if (stride_ < size_) {
 110       stride_ = size_;
 111     }
 112     // Align stride_ to 16 if it's bigger than 16.
 113     if (stride_ > 16) {
 114       stride_ &= ~15;
 115     }
 116
 117     input_block_size_ = size_ * size_;
 118     output_block_size_ = size_ * stride_;
 119
 120     input_block_ = reinterpret_cast<tran_low_t *>(
 121         vpx_memalign(16, sizeof(*input_block_) * input_block_size_));
 122     output_block_ = reinterpret_cast<uint8_t *>(
 123         vpx_memalign(16, pixel_size_ * output_block_size_));
 124     output_block_ref_ = reinterpret_cast<uint8_t *>(
 125         vpx_memalign(16, pixel_size_ * output_block_size_));
 126   }
 127
 128   virtual void TearDown() {
 129     vpx_free(input_block_);
 130     input_block_ = NULL;
 131     vpx_free(output_block_);
 132     output_block_ = NULL;
 133     vpx_free(output_block_ref_);
 134     output_block_ref_ = NULL;
 135     libvpx_test::ClearSystemState();
 136   }
 137
 138   void InitMem() {
 139     memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
 140     if (pixel_size_ == 1) {
 141       for (int j = 0; j < output_block_size_; ++j) {
 142         output_block_[j] = output_block_ref_[j] = rnd_.Rand16() & mask_;
 143       }
 144     } else {
 145       ASSERT_EQ(2, pixel_size_);
 146       uint16_t *const output = reinterpret_cast<uint16_t *>(output_block_);
 147       uint16_t *const output_ref =
 148           reinterpret_cast<uint16_t *>(output_block_ref_);
 149       for (int j = 0; j < output_block_size_; ++j) {
 150         output[j] = output_ref[j] = rnd_.Rand16() & mask_;
 151       }
 152     }
 153   }
 154
 155   void InitInput() {
 156     const int max_coeff = 32766 / 4;
 157     int max_energy_leftover = max_coeff * max_coeff;
 158     for (int j = 0; j < last_nonzero_; ++j) {
 159       int16_t coeff = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
 160                                            (rnd_.Rand16() - 32768) / 65536);
 161       max_energy_leftover -= coeff * coeff;
 162       if (max_energy_leftover < 0) {
 163         max_energy_leftover = 0;
 164         coeff = 0;
 165       }
 166       input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = coeff;
 167     }
 168   }
 169
 170  protected:
 171   int last_nonzero_;
 172   TX_SIZE tx_size_;
 173   tran_low_t *input_block_;
 174   uint8_t *output_block_;
 175   uint8_t *output_block_ref_;
 176   int size_;
 177   int stride_;
 178   int pixel_size_;
 179   int input_block_size_;
 180   int output_block_size_;
 181   int bit_depth_;
 182   int mask_;
 183   FwdTxfmFunc ftxfm_;
 184   InvTxfmWithBdFunc full_itxfm_;
 185   InvTxfmWithBdFunc partial_itxfm_;
 186   ACMRandom rnd_;
 187 };
 188
 189 TEST_P(PartialIDctTest, RunQuantCheck) {
 190   DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
 191   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
 192
 193   InitMem();
 194   for (int i = 0; i < kCountTestBlock * kCountTestBlock; ++i) {
 195     // Initialize a test block with input range [-mask_, mask_].
 196     if (i == 0) {
 197       for (int k = 0; k < input_block_size_; ++k) {
 198         input_extreme_block[k] = mask_;
 199       }
 200     } else if (i == 1) {
 201       for (int k = 0; k < input_block_size_; ++k) {
 202         input_extreme_block[k] = -mask_;
 203       }
 204     } else {
 205       for (int k = 0; k < input_block_size_; ++k) {
 206         input_extreme_block[k] = rnd_.Rand8() % 2 ? mask_ : -mask_;
 207       }
 208     }
 209
 210     ftxfm_(input_extreme_block, output_ref_block, size_);
 211
 212     // quantization with minimum allowed step sizes
 213     input_block_[0] = (output_ref_block[0] / 4) * 4;
 214     for (int k = 1; k < last_nonzero_; ++k) {
 215       const int pos = vp9_default_scan_orders[tx_size_].scan[k];
 216       input_block_[pos] = (output_ref_block[pos] / 4) * 4;
 217     }
 218
 219     ASM_REGISTER_STATE_CHECK(
 220         full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
 221     ASM_REGISTER_STATE_CHECK(
 222         partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
 223     ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
 224                         pixel_size_ * output_block_size_))
 225         << "Error: partial inverse transform produces different results";
 226   }
 227 }
 228
 229 TEST_P(PartialIDctTest, ResultsMatch) {
 230   for (int i = 0; i < kCountTestBlock; ++i) {
 231     InitMem();
 232     InitInput();
 233
 234     ASM_REGISTER_STATE_CHECK(
 235         full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
 236     ASM_REGISTER_STATE_CHECK(
 237         partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
 238     ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
 239                         pixel_size_ * output_block_size_))
 240         << "Error: partial inverse transform produces different results";
 241   }
 242 }
 243
 244 TEST_P(PartialIDctTest, AddOutputBlock) {
 245   for (int i = 0; i < kCountTestBlock; ++i) {
 246     InitMem();
 247     for (int j = 0; j < last_nonzero_; ++j) {
 248       input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = 10;
 249     }
 250
 251     ASM_REGISTER_STATE_CHECK(
 252         full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
 253     ASM_REGISTER_STATE_CHECK(
 254         partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
 255     ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
 256                         pixel_size_ * output_block_size_))
 257         << "Error: Transform results are not correctly added to output.";
 258   }
 259 }
 260
 261 TEST_P(PartialIDctTest, SingleExtremeCoeff) {
 262   const int16_t max_coeff = MaxSupportedCoeff(partial_itxfm_);
 263   const int16_t min_coeff = MinSupportedCoeff(partial_itxfm_);
 264   for (int i = 0; i < last_nonzero_; ++i) {
 265     memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
 266     // Run once for min and once for max.
 267     for (int j = 0; j < 2; ++j) {
 268       const int coeff = j ? min_coeff : max_coeff;
 269
 270       memset(output_block_, 0, pixel_size_ * output_block_size_);
 271       memset(output_block_ref_, 0, pixel_size_ * output_block_size_);
 272       input_block_[vp9_default_scan_orders[tx_size_].scan[i]] = coeff;
 273
 274       ASM_REGISTER_STATE_CHECK(
 275           full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
 276       ASM_REGISTER_STATE_CHECK(
 277           partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
 278       ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
 279                           pixel_size_ * output_block_size_))
 280           << "Error: Fails with single coeff of " << coeff << " at " << i
 281           << ".";
 282     }
 283   }
 284 }
 285
 286 TEST_P(PartialIDctTest, DISABLED_Speed) {
 287   // Keep runtime stable with transform size.
 288   const int kCountSpeedTestBlock = 500000000 / input_block_size_;
 289   InitMem();
 290   InitInput();
 291
 292   for (int i = 0; i < kCountSpeedTestBlock; ++i) {
 293     ASM_REGISTER_STATE_CHECK(
 294         full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
 295   }
 296   vpx_usec_timer timer;
 297   vpx_usec_timer_start(&timer);
 298   for (int i = 0; i < kCountSpeedTestBlock; ++i) {
 299     partial_itxfm_(input_block_, output_block_, stride_, bit_depth_);
 300   }
 301   libvpx_test::ClearSystemState();
 302   vpx_usec_timer_mark(&timer);
 303   const int elapsed_time =
 304       static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
 305   printf("idct%dx%d_%d (bitdepth %d) time: %5d ms ", size_, size_,
 306          last_nonzero_, bit_depth_, elapsed_time);
 307
 308   ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
 309                       pixel_size_ * output_block_size_))
 310       << "Error: partial inverse transform produces different results";
 311 }
 312
 313 using std::tr1::make_tuple;
 314
 315 const PartialInvTxfmParam c_partial_idct_tests[] = {
 316 #if CONFIG_VP9_HIGHBITDEPTH
 317   make_tuple(
 318       &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 319       &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 2),
 320   make_tuple(
 321       &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 322       &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 10, 2),
 323   make_tuple(
 324       &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 325       &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 12, 2),
 326   make_tuple(
 327       &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 328       &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 8, 2),
 329   make_tuple(
 330       &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 331       &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 10, 2),
 332   make_tuple(
 333       &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 334       &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 12, 2),
 335   make_tuple(&vpx_highbd_fdct32x32_c,
 336              &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 337              &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 8, 2),
 338   make_tuple(&vpx_highbd_fdct32x32_c,
 339              &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 340              &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 10, 2),
 341   make_tuple(&vpx_highbd_fdct32x32_c,
 342              &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 343              &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 12, 2),
 344   make_tuple(
 345       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 346       &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 8, 2),
 347   make_tuple(
 348       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 349       &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 10, 2),
 350   make_tuple(
 351       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 352       &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 12, 2),
 353   make_tuple(
 354       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 355       &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 8, 2),
 356   make_tuple(
 357       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 358       &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 10, 2),
 359   make_tuple(
 360       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 361       &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 12, 2),
 362   make_tuple(&vpx_highbd_fdct16x16_c,
 363              &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 364              &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 8, 2),
 365   make_tuple(&vpx_highbd_fdct16x16_c,
 366              &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 367              &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 10, 2),
 368   make_tuple(&vpx_highbd_fdct16x16_c,
 369              &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 370              &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 12, 2),
 371   make_tuple(&vpx_highbd_fdct8x8_c,
 372              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 373              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 8, 2),
 374   make_tuple(&vpx_highbd_fdct8x8_c,
 375              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 376              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 10, 2),
 377   make_tuple(&vpx_highbd_fdct8x8_c,
 378              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 379              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 12, 2),
 380   make_tuple(&vpx_highbd_fdct8x8_c,
 381              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 382              &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 8, 2),
 383   make_tuple(&vpx_highbd_fdct8x8_c,
 384              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 385              &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 10, 2),
 386   make_tuple(&vpx_highbd_fdct8x8_c,
 387              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 388              &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 12, 2),
 389   make_tuple(&vpx_highbd_fdct8x8_c,
 390              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 391              &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 8, 2),
 392   make_tuple(&vpx_highbd_fdct8x8_c,
 393              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 394              &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 10, 2),
 395   make_tuple(&vpx_highbd_fdct8x8_c,
 396              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 397              &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 12, 2),
 398   make_tuple(&vpx_highbd_fdct4x4_c,
 399              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 400              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 8, 2),
 401   make_tuple(&vpx_highbd_fdct4x4_c,
 402              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 403              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 10, 2),
 404   make_tuple(&vpx_highbd_fdct4x4_c,
 405              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 406              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 12, 2),
 407   make_tuple(&vpx_highbd_fdct4x4_c,
 408              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 409              &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 8, 2),
 410   make_tuple(&vpx_highbd_fdct4x4_c,
 411              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 412              &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 10, 2),
 413   make_tuple(&vpx_highbd_fdct4x4_c,
 414              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 415              &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 12, 2),
 416 #endif  // CONFIG_VP9_HIGHBITDEPTH
 417   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 418              &wrapper<vpx_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 1),
 419   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 420              &wrapper<vpx_idct32x32_135_add_c>, TX_32X32, 135, 8, 1),
 421   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 422              &wrapper<vpx_idct32x32_34_add_c>, TX_32X32, 34, 8, 1),
 423   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 424              &wrapper<vpx_idct32x32_1_add_c>, TX_32X32, 1, 8, 1),
 425   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 426              &wrapper<vpx_idct16x16_256_add_c>, TX_16X16, 256, 8, 1),
 427   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 428              &wrapper<vpx_idct16x16_10_add_c>, TX_16X16, 10, 8, 1),
 429   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 430              &wrapper<vpx_idct16x16_1_add_c>, TX_16X16, 1, 8, 1),
 431   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 432              &wrapper<vpx_idct8x8_64_add_c>, TX_8X8, 64, 8, 1),
 433   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 434              &wrapper<vpx_idct8x8_12_add_c>, TX_8X8, 12, 8, 1),
 435   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 436              &wrapper<vpx_idct8x8_1_add_c>, TX_8X8, 1, 8, 1),
 437   make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
 438              &wrapper<vpx_idct4x4_16_add_c>, TX_4X4, 16, 8, 1),
 439   make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
 440              &wrapper<vpx_idct4x4_1_add_c>, TX_4X4, 1, 8, 1)
 441 };
 442
 443 INSTANTIATE_TEST_CASE_P(C, PartialIDctTest,
 444                         ::testing::ValuesIn(c_partial_idct_tests));
 445
 446 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
 447 const PartialInvTxfmParam neon_partial_idct_tests[] = {
 448 #if CONFIG_VP9_HIGHBITDEPTH
 449   make_tuple(&vpx_highbd_fdct4x4_c,
 450              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 451              &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 1, 8, 2),
 452   make_tuple(&vpx_highbd_fdct4x4_c,
 453              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 454              &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 1, 10, 2),
 455   make_tuple(&vpx_highbd_fdct4x4_c,
 456              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 457              &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 1, 12, 2),
 458   make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
 459              &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 8, 2),
 460   make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
 461              &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 10, 2),
 462   make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
 463              &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 12, 2),
 464 #endif  // CONFIG_VP9_HIGHBITDEPTH
 465   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 466              &wrapper<vpx_idct32x32_1024_add_neon>, TX_32X32, 1024, 8, 1),
 467   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 468              &wrapper<vpx_idct32x32_135_add_neon>, TX_32X32, 135, 8, 1),
 469   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 470              &wrapper<vpx_idct32x32_34_add_neon>, TX_32X32, 34, 8, 1),
 471   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 472              &wrapper<vpx_idct32x32_1_add_neon>, TX_32X32, 1, 8, 1),
 473   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 474              &wrapper<vpx_idct16x16_256_add_neon>, TX_16X16, 256, 8, 1),
 475   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 476              &wrapper<vpx_idct16x16_10_add_neon>, TX_16X16, 10, 8, 1),
 477   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 478              &wrapper<vpx_idct16x16_1_add_neon>, TX_16X16, 1, 8, 1),
 479   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 480              &wrapper<vpx_idct8x8_64_add_neon>, TX_8X8, 64, 8, 1),
 481   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 482              &wrapper<vpx_idct8x8_12_add_neon>, TX_8X8, 12, 8, 1),
 483   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 484              &wrapper<vpx_idct8x8_1_add_neon>, TX_8X8, 1, 8, 1),
 485   make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
 486              &wrapper<vpx_idct4x4_16_add_neon>, TX_4X4, 16, 8, 1),
 487   make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
 488              &wrapper<vpx_idct4x4_1_add_neon>, TX_4X4, 1, 8, 1)
 489 };
 490
 491 INSTANTIATE_TEST_CASE_P(NEON, PartialIDctTest,
 492                         ::testing::ValuesIn(neon_partial_idct_tests));
 493 #endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
 494
 495 #if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
 496 // 32x32_135_ is implemented using the 1024 version.
 497 const PartialInvTxfmParam sse2_partial_idct_tests[] = {
 498 #if CONFIG_VP9_HIGHBITDEPTH
 499   make_tuple(
 500       &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 501       &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 2),
 502   make_tuple(
 503       &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 504       &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 10, 2),
 505   make_tuple(
 506       &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
 507       &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 12, 2),
 508   make_tuple(
 509       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 510       &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 2),
 511   make_tuple(
 512       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 513       &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 10, 2),
 514   make_tuple(
 515       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 516       &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 12, 2),
 517   make_tuple(
 518       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 519       &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 2),
 520   make_tuple(
 521       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 522       &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 10, 2),
 523   make_tuple(
 524       &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
 525       &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 12, 2),
 526   make_tuple(&vpx_highbd_fdct8x8_c,
 527              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 528              &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 8, 2),
 529   make_tuple(
 530       &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 531       &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 10, 2),
 532   make_tuple(
 533       &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 534       &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 12, 2),
 535   make_tuple(&vpx_highbd_fdct8x8_c,
 536              &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 537              &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 8, 2),
 538   make_tuple(
 539       &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 540       &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 10, 2),
 541   make_tuple(
 542       &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
 543       &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 12, 2),
 544   make_tuple(&vpx_highbd_fdct4x4_c,
 545              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 546              &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 1, 8, 2),
 547   make_tuple(&vpx_highbd_fdct4x4_c,
 548              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 549              &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 1, 10, 2),
 550   make_tuple(&vpx_highbd_fdct4x4_c,
 551              &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
 552              &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 1, 12, 2),
 553 #endif  // CONFIG_VP9_HIGHBITDEPTH
 554   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 555              &wrapper<vpx_idct32x32_1024_add_sse2>, TX_32X32, 1024, 8, 1),
 556   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 557              &wrapper<vpx_idct32x32_1024_add_sse2>, TX_32X32, 135, 8, 1),
 558   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 559              &wrapper<vpx_idct32x32_34_add_sse2>, TX_32X32, 34, 8, 1),
 560   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 561              &wrapper<vpx_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 1),
 562   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 563              &wrapper<vpx_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 1),
 564   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 565              &wrapper<vpx_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 1),
 566   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 567              &wrapper<vpx_idct16x16_1_add_sse2>, TX_16X16, 1, 8, 1),
 568   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 569              &wrapper<vpx_idct8x8_64_add_sse2>, TX_8X8, 64, 8, 1),
 570   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 571              &wrapper<vpx_idct8x8_12_add_sse2>, TX_8X8, 12, 8, 1),
 572   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 573              &wrapper<vpx_idct8x8_1_add_sse2>, TX_8X8, 1, 8, 1),
 574   make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
 575              &wrapper<vpx_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 1),
 576   make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
 577              &wrapper<vpx_idct4x4_1_add_sse2>, TX_4X4, 1, 8, 1)
 578 };
 579
 580 INSTANTIATE_TEST_CASE_P(SSE2, PartialIDctTest,
 581                         ::testing::ValuesIn(sse2_partial_idct_tests));
 582
 583 #endif  // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
 584
 585 #if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
 586 const PartialInvTxfmParam ssse3_partial_idct_tests[] = {
 587   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 588              &wrapper<vpx_idct32x32_1024_add_ssse3>, TX_32X32, 1024, 8, 1),
 589   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 590              &wrapper<vpx_idct32x32_135_add_ssse3>, TX_32X32, 135, 8, 1),
 591   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 592              &wrapper<vpx_idct32x32_34_add_ssse3>, TX_32X32, 34, 8, 1),
 593   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 594              &wrapper<vpx_idct8x8_64_add_ssse3>, TX_8X8, 64, 8, 1),
 595   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 596              &wrapper<vpx_idct8x8_12_add_ssse3>, TX_8X8, 12, 8, 1)
 597 };
 598
 599 INSTANTIATE_TEST_CASE_P(SSSE3, PartialIDctTest,
 600                         ::testing::ValuesIn(ssse3_partial_idct_tests));
 601 #endif  // HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
 602
 603 #if HAVE_MSA && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
 604 // 32x32_135_ is implemented using the 1024 version.
 605 const PartialInvTxfmParam msa_partial_idct_tests[] = {
 606   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 607              &wrapper<vpx_idct32x32_1024_add_msa>, TX_32X32, 1024, 8, 1),
 608   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 609              &wrapper<vpx_idct32x32_1024_add_msa>, TX_32X32, 135, 8, 1),
 610   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 611              &wrapper<vpx_idct32x32_34_add_msa>, TX_32X32, 34, 8, 1),
 612   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
 613              &wrapper<vpx_idct32x32_1_add_msa>, TX_32X32, 1, 8, 1),
 614   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 615              &wrapper<vpx_idct16x16_256_add_msa>, TX_16X16, 256, 8, 1),
 616   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 617              &wrapper<vpx_idct16x16_10_add_msa>, TX_16X16, 10, 8, 1),
 618   make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
 619              &wrapper<vpx_idct16x16_1_add_msa>, TX_16X16, 1, 8, 1),
 620   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 621              &wrapper<vpx_idct8x8_64_add_msa>, TX_8X8, 64, 8, 1),
 622   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 623              &wrapper<vpx_idct8x8_12_add_msa>, TX_8X8, 10, 8, 1),
 624   make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
 625              &wrapper<vpx_idct8x8_1_add_msa>, TX_8X8, 1, 8, 1),
 626   make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
 627              &wrapper<vpx_idct4x4_16_add_msa>, TX_4X4, 16, 8, 1),
 628   make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
 629              &wrapper<vpx_idct4x4_1_add_msa>, TX_4X4, 1, 8, 1)
 630 };
 631
 632 INSTANTIATE_TEST_CASE_P(MSA, PartialIDctTest,
 633                         ::testing::ValuesIn(msa_partial_idct_tests));
 634 #endif  // HAVE_MSA && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
 635
 636 }  // namespace