2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
17 #include "third_party/googletest/src/include/gtest/gtest.h"
19 #include "./vp9_rtcd.h"
20 #include "./vpx_dsp_rtcd.h"
21 #include "test/acm_random.h"
22 #include "test/clear_system_state.h"
23 #include "test/register_state_check.h"
24 #include "test/util.h"
25 #include "vp9/common/vp9_blockd.h"
26 #include "vp9/common/vp9_scan.h"
27 #include "vpx/vpx_integer.h"
28 #include "vpx_ports/vpx_timer.h"
30 using libvpx_test::ACMRandom;
34 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
35 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
36 typedef void (*InvTxfmWithBdFunc)(const tran_low_t *in, uint8_t *out,
39 template <InvTxfmFunc fn>
40 void wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
45 #if CONFIG_VP9_HIGHBITDEPTH
46 template <InvTxfmWithBdFunc fn>
47 void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
48 fn(in, CONVERT_TO_BYTEPTR(out), stride, bd);
52 typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmWithBdFunc, InvTxfmWithBdFunc,
53 TX_SIZE, int, int, int>
55 const int kMaxNumCoeffs = 1024;
56 const int kCountTestBlock = 1000;
58 // https://bugs.chromium.org/p/webm/issues/detail?id=1332
59 // The functions specified do not pass with INT16_MIN/MAX. They fail at the
60 // value specified, but pass when 1 is added/subtracted.
61 int16_t MaxSupportedCoeff(InvTxfmWithBdFunc a) {
62 #if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
63 if (a == &wrapper<vpx_idct8x8_64_add_ssse3> ||
64 a == &wrapper<vpx_idct8x8_12_add_ssse3>) {
70 return std::numeric_limits<int16_t>::max();
73 int16_t MinSupportedCoeff(InvTxfmWithBdFunc a) {
74 #if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
75 if (a == &wrapper<vpx_idct8x8_64_add_ssse3> ||
76 a == &wrapper<vpx_idct8x8_12_add_ssse3>) {
82 return std::numeric_limits<int16_t>::min();
85 class PartialIDctTest : public ::testing::TestWithParam<PartialInvTxfmParam> {
87 virtual ~PartialIDctTest() {}
88 virtual void SetUp() {
89 rnd_.Reset(ACMRandom::DeterministicSeed());
90 ftxfm_ = GET_PARAM(0);
91 full_itxfm_ = GET_PARAM(1);
92 partial_itxfm_ = GET_PARAM(2);
93 tx_size_ = GET_PARAM(3);
94 last_nonzero_ = GET_PARAM(4);
95 bit_depth_ = GET_PARAM(5);
96 pixel_size_ = GET_PARAM(6);
97 mask_ = (1 << bit_depth_) - 1;
100 case TX_4X4: size_ = 4; break;
101 case TX_8X8: size_ = 8; break;
102 case TX_16X16: size_ = 16; break;
103 case TX_32X32: size_ = 32; break;
104 default: FAIL() << "Wrong Size!"; break;
107 // Randomize stride_ to a value less than or equal to 1024
108 stride_ = rnd_(1024) + 1;
109 if (stride_ < size_) {
112 // Align stride_ to 16 if it's bigger than 16.
117 input_block_size_ = size_ * size_;
118 output_block_size_ = size_ * stride_;
120 input_block_ = reinterpret_cast<tran_low_t *>(
121 vpx_memalign(16, sizeof(*input_block_) * input_block_size_));
122 output_block_ = reinterpret_cast<uint8_t *>(
123 vpx_memalign(16, pixel_size_ * output_block_size_));
124 output_block_ref_ = reinterpret_cast<uint8_t *>(
125 vpx_memalign(16, pixel_size_ * output_block_size_));
128 virtual void TearDown() {
129 vpx_free(input_block_);
131 vpx_free(output_block_);
132 output_block_ = NULL;
133 vpx_free(output_block_ref_);
134 output_block_ref_ = NULL;
135 libvpx_test::ClearSystemState();
139 memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
140 if (pixel_size_ == 1) {
141 for (int j = 0; j < output_block_size_; ++j) {
142 output_block_[j] = output_block_ref_[j] = rnd_.Rand16() & mask_;
145 ASSERT_EQ(2, pixel_size_);
146 uint16_t *const output = reinterpret_cast<uint16_t *>(output_block_);
147 uint16_t *const output_ref =
148 reinterpret_cast<uint16_t *>(output_block_ref_);
149 for (int j = 0; j < output_block_size_; ++j) {
150 output[j] = output_ref[j] = rnd_.Rand16() & mask_;
156 const int max_coeff = 32766 / 4;
157 int max_energy_leftover = max_coeff * max_coeff;
158 for (int j = 0; j < last_nonzero_; ++j) {
159 int16_t coeff = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
160 (rnd_.Rand16() - 32768) / 65536);
161 max_energy_leftover -= coeff * coeff;
162 if (max_energy_leftover < 0) {
163 max_energy_leftover = 0;
166 input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = coeff;
173 tran_low_t *input_block_;
174 uint8_t *output_block_;
175 uint8_t *output_block_ref_;
179 int input_block_size_;
180 int output_block_size_;
184 InvTxfmWithBdFunc full_itxfm_;
185 InvTxfmWithBdFunc partial_itxfm_;
189 TEST_P(PartialIDctTest, RunQuantCheck) {
190 DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
191 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
194 for (int i = 0; i < kCountTestBlock; ++i) {
195 // Initialize a test block with input range [-mask_, mask_].
197 for (int k = 0; k < input_block_size_; ++k) {
198 input_extreme_block[k] = mask_;
201 for (int k = 0; k < input_block_size_; ++k) {
202 input_extreme_block[k] = -mask_;
205 for (int k = 0; k < input_block_size_; ++k) {
206 input_extreme_block[k] = rnd_.Rand8() % 2 ? mask_ : -mask_;
210 ftxfm_(input_extreme_block, output_ref_block, size_);
212 // quantization with minimum allowed step sizes
213 input_block_[0] = (output_ref_block[0] / 4) * 4;
214 for (int k = 1; k < last_nonzero_; ++k) {
215 const int pos = vp9_default_scan_orders[tx_size_].scan[k];
216 input_block_[pos] = (output_ref_block[pos] / 4) * 4;
219 ASM_REGISTER_STATE_CHECK(
220 full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
221 ASM_REGISTER_STATE_CHECK(
222 partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
223 ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
224 pixel_size_ * output_block_size_))
225 << "Error: partial inverse transform produces different results";
229 TEST_P(PartialIDctTest, ResultsMatch) {
230 for (int i = 0; i < kCountTestBlock; ++i) {
234 ASM_REGISTER_STATE_CHECK(
235 full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
236 ASM_REGISTER_STATE_CHECK(
237 partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
238 ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
239 pixel_size_ * output_block_size_))
240 << "Error: partial inverse transform produces different results";
244 TEST_P(PartialIDctTest, AddOutputBlock) {
245 for (int i = 0; i < kCountTestBlock; ++i) {
247 for (int j = 0; j < last_nonzero_; ++j) {
248 input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = 10;
251 ASM_REGISTER_STATE_CHECK(
252 full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
253 ASM_REGISTER_STATE_CHECK(
254 partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
255 ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
256 pixel_size_ * output_block_size_))
257 << "Error: Transform results are not correctly added to output.";
261 TEST_P(PartialIDctTest, SingleExtremeCoeff) {
262 const int16_t max_coeff = MaxSupportedCoeff(partial_itxfm_);
263 const int16_t min_coeff = MinSupportedCoeff(partial_itxfm_);
264 for (int i = 0; i < last_nonzero_; ++i) {
265 memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
266 // Run once for min and once for max.
267 for (int j = 0; j < 2; ++j) {
268 const int coeff = j ? min_coeff : max_coeff;
270 memset(output_block_, 0, pixel_size_ * output_block_size_);
271 memset(output_block_ref_, 0, pixel_size_ * output_block_size_);
272 input_block_[vp9_default_scan_orders[tx_size_].scan[i]] = coeff;
274 ASM_REGISTER_STATE_CHECK(
275 full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
276 ASM_REGISTER_STATE_CHECK(
277 partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
278 ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
279 pixel_size_ * output_block_size_))
280 << "Error: Fails with single coeff of " << coeff << " at " << i
286 TEST_P(PartialIDctTest, DISABLED_Speed) {
287 // Keep runtime stable with transform size.
288 const int kCountSpeedTestBlock = 500000000 / input_block_size_;
292 for (int i = 0; i < kCountSpeedTestBlock; ++i) {
293 ASM_REGISTER_STATE_CHECK(
294 full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
296 vpx_usec_timer timer;
297 vpx_usec_timer_start(&timer);
298 for (int i = 0; i < kCountSpeedTestBlock; ++i) {
299 partial_itxfm_(input_block_, output_block_, stride_, bit_depth_);
301 libvpx_test::ClearSystemState();
302 vpx_usec_timer_mark(&timer);
303 const int elapsed_time =
304 static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
305 printf("idct%dx%d_%d (bitdepth %d) time: %5d ms\n", size_, size_,
306 last_nonzero_, bit_depth_, elapsed_time);
308 ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
309 pixel_size_ * output_block_size_))
310 << "Error: partial inverse transform produces different results";
313 using std::tr1::make_tuple;
315 const PartialInvTxfmParam c_partial_idct_tests[] = {
316 #if CONFIG_VP9_HIGHBITDEPTH
318 &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
319 &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 2),
321 &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
322 &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 10, 2),
324 &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
325 &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 12, 2),
327 &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
328 &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 8, 2),
330 &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
331 &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 10, 2),
333 &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
334 &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 12, 2),
335 make_tuple(&vpx_highbd_fdct32x32_c,
336 &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
337 &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 8, 2),
338 make_tuple(&vpx_highbd_fdct32x32_c,
339 &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
340 &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 10, 2),
341 make_tuple(&vpx_highbd_fdct32x32_c,
342 &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
343 &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 12, 2),
345 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
346 &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 8, 2),
348 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
349 &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 10, 2),
351 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
352 &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 12, 2),
354 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
355 &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 8, 2),
357 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
358 &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 10, 2),
360 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
361 &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 12, 2),
362 make_tuple(&vpx_highbd_fdct16x16_c,
363 &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
364 &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 8, 2),
365 make_tuple(&vpx_highbd_fdct16x16_c,
366 &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
367 &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 10, 2),
368 make_tuple(&vpx_highbd_fdct16x16_c,
369 &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
370 &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 12, 2),
371 make_tuple(&vpx_highbd_fdct8x8_c,
372 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
373 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 8, 2),
374 make_tuple(&vpx_highbd_fdct8x8_c,
375 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
376 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 10, 2),
377 make_tuple(&vpx_highbd_fdct8x8_c,
378 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
379 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 12, 2),
380 make_tuple(&vpx_highbd_fdct8x8_c,
381 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
382 &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 8, 2),
383 make_tuple(&vpx_highbd_fdct8x8_c,
384 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
385 &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 10, 2),
386 make_tuple(&vpx_highbd_fdct8x8_c,
387 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
388 &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 12, 2),
389 make_tuple(&vpx_highbd_fdct8x8_c,
390 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
391 &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 8, 2),
392 make_tuple(&vpx_highbd_fdct8x8_c,
393 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
394 &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 10, 2),
395 make_tuple(&vpx_highbd_fdct8x8_c,
396 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
397 &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 12, 2),
398 make_tuple(&vpx_highbd_fdct4x4_c,
399 &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
400 &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 8, 2),
401 make_tuple(&vpx_highbd_fdct4x4_c,
402 &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
403 &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 10, 2),
404 make_tuple(&vpx_highbd_fdct4x4_c,
405 &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
406 &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 12, 2),
407 make_tuple(&vpx_highbd_fdct4x4_c,
408 &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
409 &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 8, 2),
410 make_tuple(&vpx_highbd_fdct4x4_c,
411 &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
412 &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 10, 2),
413 make_tuple(&vpx_highbd_fdct4x4_c,
414 &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
415 &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 12, 2),
416 #endif // CONFIG_VP9_HIGHBITDEPTH
417 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
418 &wrapper<vpx_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 1),
419 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
420 &wrapper<vpx_idct32x32_135_add_c>, TX_32X32, 135, 8, 1),
421 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
422 &wrapper<vpx_idct32x32_34_add_c>, TX_32X32, 34, 8, 1),
423 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
424 &wrapper<vpx_idct32x32_1_add_c>, TX_32X32, 1, 8, 1),
425 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
426 &wrapper<vpx_idct16x16_256_add_c>, TX_16X16, 256, 8, 1),
427 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
428 &wrapper<vpx_idct16x16_10_add_c>, TX_16X16, 10, 8, 1),
429 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
430 &wrapper<vpx_idct16x16_1_add_c>, TX_16X16, 1, 8, 1),
431 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
432 &wrapper<vpx_idct8x8_64_add_c>, TX_8X8, 64, 8, 1),
433 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
434 &wrapper<vpx_idct8x8_12_add_c>, TX_8X8, 12, 8, 1),
435 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
436 &wrapper<vpx_idct8x8_1_add_c>, TX_8X8, 1, 8, 1),
437 make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
438 &wrapper<vpx_idct4x4_16_add_c>, TX_4X4, 16, 8, 1),
439 make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
440 &wrapper<vpx_idct4x4_1_add_c>, TX_4X4, 1, 8, 1)
443 INSTANTIATE_TEST_CASE_P(C, PartialIDctTest,
444 ::testing::ValuesIn(c_partial_idct_tests));
446 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
447 const PartialInvTxfmParam neon_partial_idct_tests[] = {
448 #if CONFIG_VP9_HIGHBITDEPTH
449 make_tuple(&vpx_highbd_fdct8x8_c,
450 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
451 &highbd_wrapper<vpx_highbd_idct8x8_64_add_neon>, TX_8X8, 64, 8, 2),
453 &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
454 &highbd_wrapper<vpx_highbd_idct8x8_64_add_neon>, TX_8X8, 64, 10, 2),
456 &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
457 &highbd_wrapper<vpx_highbd_idct8x8_64_add_neon>, TX_8X8, 64, 12, 2),
458 make_tuple(&vpx_highbd_fdct8x8_c,
459 &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>,
460 &highbd_wrapper<vpx_highbd_idct8x8_12_add_neon>, TX_8X8, 12, 8, 2),
462 &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>,
463 &highbd_wrapper<vpx_highbd_idct8x8_12_add_neon>, TX_8X8, 12, 10, 2),
465 &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>,
466 &highbd_wrapper<vpx_highbd_idct8x8_12_add_neon>, TX_8X8, 12, 12, 2),
467 make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>,
468 &highbd_wrapper<vpx_highbd_idct8x8_1_add_neon>, TX_8X8, 1, 8, 2),
469 make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>,
470 &highbd_wrapper<vpx_highbd_idct8x8_1_add_neon>, TX_8X8, 1, 10, 2),
471 make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>,
472 &highbd_wrapper<vpx_highbd_idct8x8_1_add_neon>, TX_8X8, 1, 12, 2),
473 make_tuple(&vpx_highbd_fdct4x4_c,
474 &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
475 &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 16, 8, 2),
477 &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
478 &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 16, 10, 2),
480 &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
481 &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 16, 12, 2),
482 make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
483 &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 8, 2),
484 make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
485 &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 10, 2),
486 make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
487 &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 12, 2),
488 #endif // CONFIG_VP9_HIGHBITDEPTH
489 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
490 &wrapper<vpx_idct32x32_1024_add_neon>, TX_32X32, 1024, 8, 1),
491 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
492 &wrapper<vpx_idct32x32_135_add_neon>, TX_32X32, 135, 8, 1),
493 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
494 &wrapper<vpx_idct32x32_34_add_neon>, TX_32X32, 34, 8, 1),
495 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
496 &wrapper<vpx_idct32x32_1_add_neon>, TX_32X32, 1, 8, 1),
497 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
498 &wrapper<vpx_idct16x16_256_add_neon>, TX_16X16, 256, 8, 1),
499 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
500 &wrapper<vpx_idct16x16_10_add_neon>, TX_16X16, 10, 8, 1),
501 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
502 &wrapper<vpx_idct16x16_1_add_neon>, TX_16X16, 1, 8, 1),
503 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
504 &wrapper<vpx_idct8x8_64_add_neon>, TX_8X8, 64, 8, 1),
505 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
506 &wrapper<vpx_idct8x8_12_add_neon>, TX_8X8, 12, 8, 1),
507 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
508 &wrapper<vpx_idct8x8_1_add_neon>, TX_8X8, 1, 8, 1),
509 make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
510 &wrapper<vpx_idct4x4_16_add_neon>, TX_4X4, 16, 8, 1),
511 make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
512 &wrapper<vpx_idct4x4_1_add_neon>, TX_4X4, 1, 8, 1)
515 INSTANTIATE_TEST_CASE_P(NEON, PartialIDctTest,
516 ::testing::ValuesIn(neon_partial_idct_tests));
517 #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
519 #if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
520 // 32x32_135_ is implemented using the 1024 version.
521 const PartialInvTxfmParam sse2_partial_idct_tests[] = {
522 #if CONFIG_VP9_HIGHBITDEPTH
524 &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
525 &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 2),
527 &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
528 &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 10, 2),
530 &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
531 &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 12, 2),
533 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
534 &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 2),
536 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
537 &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 10, 2),
539 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
540 &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 12, 2),
542 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
543 &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 2),
545 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
546 &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 10, 2),
548 &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
549 &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 12, 2),
550 make_tuple(&vpx_highbd_fdct8x8_c,
551 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
552 &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 8, 2),
554 &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
555 &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 10, 2),
557 &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
558 &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 12, 2),
559 make_tuple(&vpx_highbd_fdct8x8_c,
560 &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
561 &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 8, 2),
563 &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
564 &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 10, 2),
566 &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
567 &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 12, 2),
568 make_tuple(&vpx_highbd_fdct4x4_c,
569 &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
570 &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 2),
572 &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
573 &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 10, 2),
575 &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
576 &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 12, 2),
577 #endif // CONFIG_VP9_HIGHBITDEPTH
578 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
579 &wrapper<vpx_idct32x32_1024_add_sse2>, TX_32X32, 1024, 8, 1),
580 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
581 &wrapper<vpx_idct32x32_1024_add_sse2>, TX_32X32, 135, 8, 1),
582 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
583 &wrapper<vpx_idct32x32_34_add_sse2>, TX_32X32, 34, 8, 1),
584 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
585 &wrapper<vpx_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 1),
586 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
587 &wrapper<vpx_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 1),
588 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
589 &wrapper<vpx_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 1),
590 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
591 &wrapper<vpx_idct16x16_1_add_sse2>, TX_16X16, 1, 8, 1),
592 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
593 &wrapper<vpx_idct8x8_64_add_sse2>, TX_8X8, 64, 8, 1),
594 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
595 &wrapper<vpx_idct8x8_12_add_sse2>, TX_8X8, 12, 8, 1),
596 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
597 &wrapper<vpx_idct8x8_1_add_sse2>, TX_8X8, 1, 8, 1),
598 make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
599 &wrapper<vpx_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 1),
600 make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
601 &wrapper<vpx_idct4x4_1_add_sse2>, TX_4X4, 1, 8, 1)
604 INSTANTIATE_TEST_CASE_P(SSE2, PartialIDctTest,
605 ::testing::ValuesIn(sse2_partial_idct_tests));
607 #endif // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
609 #if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
610 const PartialInvTxfmParam ssse3_partial_idct_tests[] = {
611 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
612 &wrapper<vpx_idct32x32_1024_add_ssse3>, TX_32X32, 1024, 8, 1),
613 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
614 &wrapper<vpx_idct32x32_135_add_ssse3>, TX_32X32, 135, 8, 1),
615 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
616 &wrapper<vpx_idct32x32_34_add_ssse3>, TX_32X32, 34, 8, 1),
617 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
618 &wrapper<vpx_idct8x8_64_add_ssse3>, TX_8X8, 64, 8, 1),
619 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
620 &wrapper<vpx_idct8x8_12_add_ssse3>, TX_8X8, 12, 8, 1)
623 INSTANTIATE_TEST_CASE_P(SSSE3, PartialIDctTest,
624 ::testing::ValuesIn(ssse3_partial_idct_tests));
625 #endif // HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
627 #if HAVE_DSPR2 && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
628 const PartialInvTxfmParam dspr2_partial_idct_tests[] = {
629 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
630 &wrapper<vpx_idct32x32_1024_add_dspr2>, TX_32X32, 1024, 8, 1),
631 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
632 &wrapper<vpx_idct32x32_1024_add_dspr2>, TX_32X32, 135, 8, 1),
633 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
634 &wrapper<vpx_idct32x32_34_add_dspr2>, TX_32X32, 34, 8, 1),
635 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
636 &wrapper<vpx_idct32x32_1_add_dspr2>, TX_32X32, 1, 8, 1),
637 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
638 &wrapper<vpx_idct16x16_256_add_dspr2>, TX_16X16, 256, 8, 1),
639 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
640 &wrapper<vpx_idct16x16_10_add_dspr2>, TX_16X16, 10, 8, 1),
641 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
642 &wrapper<vpx_idct16x16_1_add_dspr2>, TX_16X16, 1, 8, 1),
643 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
644 &wrapper<vpx_idct8x8_64_add_dspr2>, TX_8X8, 64, 8, 1),
645 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
646 &wrapper<vpx_idct8x8_12_add_dspr2>, TX_8X8, 12, 8, 1),
647 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
648 &wrapper<vpx_idct8x8_1_add_dspr2>, TX_8X8, 1, 8, 1),
649 make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
650 &wrapper<vpx_idct4x4_16_add_dspr2>, TX_4X4, 16, 8, 1),
651 make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
652 &wrapper<vpx_idct4x4_1_add_dspr2>, TX_4X4, 1, 8, 1)
655 INSTANTIATE_TEST_CASE_P(DSPR2, PartialIDctTest,
656 ::testing::ValuesIn(dspr2_partial_idct_tests));
657 #endif // HAVE_DSPR2 && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
659 #if HAVE_MSA && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
660 // 32x32_135_ is implemented using the 1024 version.
661 const PartialInvTxfmParam msa_partial_idct_tests[] = {
662 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
663 &wrapper<vpx_idct32x32_1024_add_msa>, TX_32X32, 1024, 8, 1),
664 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
665 &wrapper<vpx_idct32x32_1024_add_msa>, TX_32X32, 135, 8, 1),
666 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
667 &wrapper<vpx_idct32x32_34_add_msa>, TX_32X32, 34, 8, 1),
668 make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
669 &wrapper<vpx_idct32x32_1_add_msa>, TX_32X32, 1, 8, 1),
670 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
671 &wrapper<vpx_idct16x16_256_add_msa>, TX_16X16, 256, 8, 1),
672 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
673 &wrapper<vpx_idct16x16_10_add_msa>, TX_16X16, 10, 8, 1),
674 make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
675 &wrapper<vpx_idct16x16_1_add_msa>, TX_16X16, 1, 8, 1),
676 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
677 &wrapper<vpx_idct8x8_64_add_msa>, TX_8X8, 64, 8, 1),
678 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
679 &wrapper<vpx_idct8x8_12_add_msa>, TX_8X8, 12, 8, 1),
680 make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
681 &wrapper<vpx_idct8x8_1_add_msa>, TX_8X8, 1, 8, 1),
682 make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
683 &wrapper<vpx_idct4x4_16_add_msa>, TX_4X4, 16, 8, 1),
684 make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
685 &wrapper<vpx_idct4x4_1_add_msa>, TX_4X4, 1, 8, 1)
688 INSTANTIATE_TEST_CASE_P(MSA, PartialIDctTest,
689 ::testing::ValuesIn(msa_partial_idct_tests));
690 #endif // HAVE_MSA && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH