2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
15 #include "third_party/googletest/src/include/gtest/gtest.h"
17 #include "./vp9_rtcd.h"
18 #include "./vpx_dsp_rtcd.h"
19 #include "test/acm_random.h"
20 #include "test/clear_system_state.h"
21 #include "test/register_state_check.h"
22 #include "test/util.h"
23 #include "vp9/common/vp9_entropy.h"
24 #include "vp9/common/vp9_scan.h"
25 #include "vpx/vpx_codec.h"
26 #include "vpx/vpx_integer.h"
27 #include "vpx_ports/mem.h"
29 using libvpx_test::ACMRandom;
33 const int kNumCoeffs = 64;
34 const double kPi = 3.141592653589793238462643383279502884;
36 const int kSignBiasMaxDiff255 = 1500;
37 const int kSignBiasMaxDiff15 = 10000;
39 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
40 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
41 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
43 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
46 typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
47 typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
48 typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
50 void reference_8x8_dct_1d(const double in[8], double out[8], int stride) {
51 const double kInvSqrt2 = 0.707106781186547524400844362104;
52 for (int k = 0; k < 8; k++) {
54 for (int n = 0; n < 8; n++)
55 out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
57 out[k] = out[k] * kInvSqrt2;
61 void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
62 double output[kNumCoeffs]) {
63 // First transform columns
64 for (int i = 0; i < 8; ++i) {
65 double temp_in[8], temp_out[8];
66 for (int j = 0; j < 8; ++j)
67 temp_in[j] = input[j*8 + i];
68 reference_8x8_dct_1d(temp_in, temp_out, 1);
69 for (int j = 0; j < 8; ++j)
70 output[j * 8 + i] = temp_out[j];
72 // Then transform rows
73 for (int i = 0; i < 8; ++i) {
74 double temp_in[8], temp_out[8];
75 for (int j = 0; j < 8; ++j)
76 temp_in[j] = output[j + i*8];
77 reference_8x8_dct_1d(temp_in, temp_out, 1);
78 // Scale by some magic number
79 for (int j = 0; j < 8; ++j)
80 output[j + i * 8] = temp_out[j] * 2;
85 void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
86 vpx_fdct8x8_c(in, out, stride);
89 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
90 vp9_fht8x8_c(in, out, stride, tx_type);
93 #if CONFIG_VP9_HIGHBITDEPTH
94 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
95 vpx_highbd_idct8x8_64_add_c(in, out, stride, 10);
98 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
99 vpx_highbd_idct8x8_64_add_c(in, out, stride, 12);
102 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
103 vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 10);
106 void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
107 vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12);
110 void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
111 vpx_highbd_idct8x8_10_add_c(in, out, stride, 10);
114 void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
115 vpx_highbd_idct8x8_10_add_c(in, out, stride, 12);
119 void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
120 vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
123 void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
124 vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
127 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
128 vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
131 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
132 vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
135 #endif // CONFIG_VP9_HIGHBITDEPTH
137 class FwdTrans8x8TestBase {
139 virtual ~FwdTrans8x8TestBase() {}
142 virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
143 virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
145 void RunSignBiasCheck() {
146 ACMRandom rnd(ACMRandom::DeterministicSeed());
147 DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
148 DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
149 int count_sign_block[64][2];
150 const int count_test_block = 100000;
152 memset(count_sign_block, 0, sizeof(count_sign_block));
154 for (int i = 0; i < count_test_block; ++i) {
155 // Initialize a test block with input range [-255, 255].
156 for (int j = 0; j < 64; ++j)
157 test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
158 ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
159 ASM_REGISTER_STATE_CHECK(
160 RunFwdTxfm(test_input_block, test_output_block, pitch_));
162 for (int j = 0; j < 64; ++j) {
163 if (test_output_block[j] < 0)
164 ++count_sign_block[j][0];
165 else if (test_output_block[j] > 0)
166 ++count_sign_block[j][1];
170 for (int j = 0; j < 64; ++j) {
171 const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
172 const int max_diff = kSignBiasMaxDiff255;
173 EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
174 << "Error: 8x8 FDCT/FHT has a sign bias > "
175 << 1. * max_diff / count_test_block * 100 << "%"
176 << " for input range [-255, 255] at index " << j
177 << " count0: " << count_sign_block[j][0]
178 << " count1: " << count_sign_block[j][1]
179 << " diff: " << diff;
182 memset(count_sign_block, 0, sizeof(count_sign_block));
184 for (int i = 0; i < count_test_block; ++i) {
185 // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
186 for (int j = 0; j < 64; ++j)
187 test_input_block[j] = ((rnd.Rand16() & mask_) >> 4) -
188 ((rnd.Rand16() & mask_) >> 4);
189 ASM_REGISTER_STATE_CHECK(
190 RunFwdTxfm(test_input_block, test_output_block, pitch_));
192 for (int j = 0; j < 64; ++j) {
193 if (test_output_block[j] < 0)
194 ++count_sign_block[j][0];
195 else if (test_output_block[j] > 0)
196 ++count_sign_block[j][1];
200 for (int j = 0; j < 64; ++j) {
201 const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
202 const int max_diff = kSignBiasMaxDiff15;
203 EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
204 << "Error: 8x8 FDCT/FHT has a sign bias > "
205 << 1. * max_diff / count_test_block * 100 << "%"
206 << " for input range [-15, 15] at index " << j
207 << " count0: " << count_sign_block[j][0]
208 << " count1: " << count_sign_block[j][1]
209 << " diff: " << diff;
213 void RunRoundTripErrorCheck() {
214 ACMRandom rnd(ACMRandom::DeterministicSeed());
217 const int count_test_block = 100000;
218 DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
219 DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
220 DECLARE_ALIGNED(16, uint8_t, dst[64]);
221 DECLARE_ALIGNED(16, uint8_t, src[64]);
222 #if CONFIG_VP9_HIGHBITDEPTH
223 DECLARE_ALIGNED(16, uint16_t, dst16[64]);
224 DECLARE_ALIGNED(16, uint16_t, src16[64]);
227 for (int i = 0; i < count_test_block; ++i) {
228 // Initialize a test block with input range [-mask_, mask_].
229 for (int j = 0; j < 64; ++j) {
230 if (bit_depth_ == VPX_BITS_8) {
231 src[j] = rnd.Rand8();
232 dst[j] = rnd.Rand8();
233 test_input_block[j] = src[j] - dst[j];
234 #if CONFIG_VP9_HIGHBITDEPTH
236 src16[j] = rnd.Rand16() & mask_;
237 dst16[j] = rnd.Rand16() & mask_;
238 test_input_block[j] = src16[j] - dst16[j];
243 ASM_REGISTER_STATE_CHECK(
244 RunFwdTxfm(test_input_block, test_temp_block, pitch_));
245 for (int j = 0; j < 64; ++j) {
246 if (test_temp_block[j] > 0) {
247 test_temp_block[j] += 2;
248 test_temp_block[j] /= 4;
249 test_temp_block[j] *= 4;
251 test_temp_block[j] -= 2;
252 test_temp_block[j] /= 4;
253 test_temp_block[j] *= 4;
256 if (bit_depth_ == VPX_BITS_8) {
257 ASM_REGISTER_STATE_CHECK(
258 RunInvTxfm(test_temp_block, dst, pitch_));
259 #if CONFIG_VP9_HIGHBITDEPTH
261 ASM_REGISTER_STATE_CHECK(
262 RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
266 for (int j = 0; j < 64; ++j) {
267 #if CONFIG_VP9_HIGHBITDEPTH
269 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
271 const int diff = dst[j] - src[j];
273 const int error = diff * diff;
274 if (max_error < error)
276 total_error += error;
280 EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
281 << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
282 << " roundtrip error > 1";
284 EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8))/5, total_error)
285 << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
286 << "error > 1/5 per block";
289 void RunExtremalCheck() {
290 ACMRandom rnd(ACMRandom::DeterministicSeed());
293 int total_coeff_error = 0;
294 const int count_test_block = 100000;
295 DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
296 DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
297 DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
298 DECLARE_ALIGNED(16, uint8_t, dst[64]);
299 DECLARE_ALIGNED(16, uint8_t, src[64]);
300 #if CONFIG_VP9_HIGHBITDEPTH
301 DECLARE_ALIGNED(16, uint16_t, dst16[64]);
302 DECLARE_ALIGNED(16, uint16_t, src16[64]);
305 for (int i = 0; i < count_test_block; ++i) {
306 // Initialize a test block with input range [-mask_, mask_].
307 for (int j = 0; j < 64; ++j) {
308 if (bit_depth_ == VPX_BITS_8) {
316 src[j] = rnd.Rand8() % 2 ? 255 : 0;
317 dst[j] = rnd.Rand8() % 2 ? 255 : 0;
319 test_input_block[j] = src[j] - dst[j];
320 #if CONFIG_VP9_HIGHBITDEPTH
329 src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
330 dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
332 test_input_block[j] = src16[j] - dst16[j];
337 ASM_REGISTER_STATE_CHECK(
338 RunFwdTxfm(test_input_block, test_temp_block, pitch_));
339 ASM_REGISTER_STATE_CHECK(
340 fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
341 if (bit_depth_ == VPX_BITS_8) {
342 ASM_REGISTER_STATE_CHECK(
343 RunInvTxfm(test_temp_block, dst, pitch_));
344 #if CONFIG_VP9_HIGHBITDEPTH
346 ASM_REGISTER_STATE_CHECK(
347 RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
351 for (int j = 0; j < 64; ++j) {
352 #if CONFIG_VP9_HIGHBITDEPTH
354 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
356 const int diff = dst[j] - src[j];
358 const int error = diff * diff;
359 if (max_error < error)
361 total_error += error;
363 const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
364 total_coeff_error += abs(coeff_diff);
367 EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
368 << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
369 << "an individual roundtrip error > 1";
371 EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8))/5, total_error)
372 << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
373 << " roundtrip error > 1/5 per block";
375 EXPECT_EQ(0, total_coeff_error)
376 << "Error: Extremal 8x8 FDCT/FHT has"
377 << "overflow issues in the intermediate steps > 1";
381 void RunInvAccuracyCheck() {
382 ACMRandom rnd(ACMRandom::DeterministicSeed());
383 const int count_test_block = 1000;
384 DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
385 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
386 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
387 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
388 #if CONFIG_VP9_HIGHBITDEPTH
389 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
390 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
393 for (int i = 0; i < count_test_block; ++i) {
394 double out_r[kNumCoeffs];
396 // Initialize a test block with input range [-255, 255].
397 for (int j = 0; j < kNumCoeffs; ++j) {
398 if (bit_depth_ == VPX_BITS_8) {
399 src[j] = rnd.Rand8() % 2 ? 255 : 0;
400 dst[j] = src[j] > 0 ? 0 : 255;
401 in[j] = src[j] - dst[j];
402 #if CONFIG_VP9_HIGHBITDEPTH
404 src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
405 dst16[j] = src16[j] > 0 ? 0 : mask_;
406 in[j] = src16[j] - dst16[j];
411 reference_8x8_dct_2d(in, out_r);
412 for (int j = 0; j < kNumCoeffs; ++j)
413 coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
415 if (bit_depth_ == VPX_BITS_8) {
416 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
417 #if CONFIG_VP9_HIGHBITDEPTH
419 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
424 for (int j = 0; j < kNumCoeffs; ++j) {
425 #if CONFIG_VP9_HIGHBITDEPTH
426 const uint32_t diff =
427 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
429 const uint32_t diff = dst[j] - src[j];
431 const uint32_t error = diff * diff;
432 EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
433 << "Error: 8x8 IDCT has error " << error
434 << " at index " << j;
439 void RunFwdAccuracyCheck() {
440 ACMRandom rnd(ACMRandom::DeterministicSeed());
441 const int count_test_block = 1000;
442 DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
443 DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
444 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
446 for (int i = 0; i < count_test_block; ++i) {
447 double out_r[kNumCoeffs];
449 // Initialize a test block with input range [-mask_, mask_].
450 for (int j = 0; j < kNumCoeffs; ++j)
451 in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
453 RunFwdTxfm(in, coeff, pitch_);
454 reference_8x8_dct_2d(in, out_r);
455 for (int j = 0; j < kNumCoeffs; ++j)
456 coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
458 for (int j = 0; j < kNumCoeffs; ++j) {
459 const uint32_t diff = coeff[j] - coeff_r[j];
460 const uint32_t error = diff * diff;
461 EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
462 << "Error: 8x8 DCT has error " << error
463 << " at index " << j;
468 void CompareInvReference(IdctFunc ref_txfm, int thresh) {
469 ACMRandom rnd(ACMRandom::DeterministicSeed());
470 const int count_test_block = 10000;
472 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
473 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
474 DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
475 #if CONFIG_VP9_HIGHBITDEPTH
476 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
477 DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
479 const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
481 for (int i = 0; i < count_test_block; ++i) {
482 for (int j = 0; j < kNumCoeffs; ++j) {
484 // Random values less than the threshold, either positive or negative
485 coeff[scan[j]] = rnd(thresh) * (1-2*(i%2));
489 if (bit_depth_ == VPX_BITS_8) {
492 #if CONFIG_VP9_HIGHBITDEPTH
499 if (bit_depth_ == VPX_BITS_8) {
500 ref_txfm(coeff, ref, pitch_);
501 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
502 #if CONFIG_VP9_HIGHBITDEPTH
504 ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
505 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
510 for (int j = 0; j < kNumCoeffs; ++j) {
511 #if CONFIG_VP9_HIGHBITDEPTH
512 const uint32_t diff =
513 bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
515 const uint32_t diff = dst[j] - ref[j];
517 const uint32_t error = diff * diff;
519 << "Error: 8x8 IDCT has error " << error
520 << " at index " << j;
526 FhtFunc fwd_txfm_ref;
527 vpx_bit_depth_t bit_depth_;
532 : public FwdTrans8x8TestBase,
533 public ::testing::TestWithParam<Dct8x8Param> {
535 virtual ~FwdTrans8x8DCT() {}
537 virtual void SetUp() {
538 fwd_txfm_ = GET_PARAM(0);
539 inv_txfm_ = GET_PARAM(1);
540 tx_type_ = GET_PARAM(2);
542 fwd_txfm_ref = fdct8x8_ref;
543 bit_depth_ = GET_PARAM(3);
544 mask_ = (1 << bit_depth_) - 1;
547 virtual void TearDown() { libvpx_test::ClearSystemState(); }
550 void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
551 fwd_txfm_(in, out, stride);
553 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
554 inv_txfm_(out, dst, stride);
561 TEST_P(FwdTrans8x8DCT, SignBiasCheck) {
565 TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) {
566 RunRoundTripErrorCheck();
569 TEST_P(FwdTrans8x8DCT, ExtremalCheck) {
573 TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) {
574 RunFwdAccuracyCheck();
577 TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) {
578 RunInvAccuracyCheck();
582 : public FwdTrans8x8TestBase,
583 public ::testing::TestWithParam<Ht8x8Param> {
585 virtual ~FwdTrans8x8HT() {}
587 virtual void SetUp() {
588 fwd_txfm_ = GET_PARAM(0);
589 inv_txfm_ = GET_PARAM(1);
590 tx_type_ = GET_PARAM(2);
592 fwd_txfm_ref = fht8x8_ref;
593 bit_depth_ = GET_PARAM(3);
594 mask_ = (1 << bit_depth_) - 1;
597 virtual void TearDown() { libvpx_test::ClearSystemState(); }
600 void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
601 fwd_txfm_(in, out, stride, tx_type_);
603 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
604 inv_txfm_(out, dst, stride, tx_type_);
611 TEST_P(FwdTrans8x8HT, SignBiasCheck) {
615 TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) {
616 RunRoundTripErrorCheck();
619 TEST_P(FwdTrans8x8HT, ExtremalCheck) {
624 : public FwdTrans8x8TestBase,
625 public ::testing::TestWithParam<Idct8x8Param> {
627 virtual ~InvTrans8x8DCT() {}
629 virtual void SetUp() {
630 ref_txfm_ = GET_PARAM(0);
631 inv_txfm_ = GET_PARAM(1);
632 thresh_ = GET_PARAM(2);
634 bit_depth_ = GET_PARAM(3);
635 mask_ = (1 << bit_depth_) - 1;
638 virtual void TearDown() { libvpx_test::ClearSystemState(); }
641 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
642 inv_txfm_(out, dst, stride);
644 void RunFwdTxfm(int16_t *out, tran_low_t *dst, int stride) {}
651 TEST_P(InvTrans8x8DCT, CompareReference) {
652 CompareInvReference(ref_txfm_, thresh_);
655 using std::tr1::make_tuple;
657 #if CONFIG_VP9_HIGHBITDEPTH
658 INSTANTIATE_TEST_CASE_P(
661 make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
662 make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
663 make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
665 INSTANTIATE_TEST_CASE_P(
668 make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8)));
669 #endif // CONFIG_VP9_HIGHBITDEPTH
671 #if CONFIG_VP9_HIGHBITDEPTH
672 INSTANTIATE_TEST_CASE_P(
675 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
676 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
677 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
678 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
679 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 3, VPX_BITS_10),
680 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 0, VPX_BITS_12),
681 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
682 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
683 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
684 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
685 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
686 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
688 INSTANTIATE_TEST_CASE_P(
691 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
692 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
693 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
694 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
695 #endif // CONFIG_VP9_HIGHBITDEPTH
697 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
698 INSTANTIATE_TEST_CASE_P(
699 NEON, FwdTrans8x8DCT,
701 make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 0,
703 #endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
705 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
706 INSTANTIATE_TEST_CASE_P(
709 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
710 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
711 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
712 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
713 #endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
715 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
716 INSTANTIATE_TEST_CASE_P(
717 SSE2, FwdTrans8x8DCT,
719 make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 0,
721 INSTANTIATE_TEST_CASE_P(
724 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0, VPX_BITS_8),
725 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
726 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
727 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
728 #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
730 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
731 INSTANTIATE_TEST_CASE_P(
732 SSE2, FwdTrans8x8DCT,
734 make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
735 make_tuple(&vpx_highbd_fdct8x8_c,
736 &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
737 make_tuple(&vpx_highbd_fdct8x8_sse2,
738 &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
739 make_tuple(&vpx_highbd_fdct8x8_c,
740 &idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
741 make_tuple(&vpx_highbd_fdct8x8_sse2,
742 &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
744 INSTANTIATE_TEST_CASE_P(
747 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
748 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
749 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
750 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
752 // Optimizations take effect at a threshold of 6201, so we use a value close to
753 // that to test both branches.
754 INSTANTIATE_TEST_CASE_P(
755 SSE2, InvTrans8x8DCT,
757 make_tuple(&idct8x8_10_add_10_c,
758 &idct8x8_10_add_10_sse2, 6225, VPX_BITS_10),
759 make_tuple(&idct8x8_10,
760 &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
761 make_tuple(&idct8x8_10_add_12_c,
762 &idct8x8_10_add_12_sse2, 6225, VPX_BITS_12),
763 make_tuple(&idct8x8_12,
764 &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
765 #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
767 #if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \
768 !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
769 INSTANTIATE_TEST_CASE_P(
770 SSSE3, FwdTrans8x8DCT,
772 make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_ssse3, 0,
776 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
777 INSTANTIATE_TEST_CASE_P(
780 make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 0, VPX_BITS_8)));
781 INSTANTIATE_TEST_CASE_P(
784 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
785 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
786 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
787 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
788 #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE