2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
15 #include "third_party/googletest/src/include/gtest/gtest.h"
17 #include "./vp9_rtcd.h"
18 #include "./vpx_dsp_rtcd.h"
19 #include "test/acm_random.h"
20 #include "test/clear_system_state.h"
21 #include "test/register_state_check.h"
22 #include "test/util.h"
23 #include "vp9/common/vp9_entropy.h"
24 #include "vpx/vpx_codec.h"
25 #include "vpx/vpx_integer.h"
26 #include "vpx_ports/mem.h"
28 using libvpx_test::ACMRandom;
31 const int kNumCoeffs = 16;
32 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
33 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
34 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
36 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
39 typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct4x4Param;
40 typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;
42 void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
44 vpx_fdct4x4_c(in, out, stride);
47 void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
48 vp9_fht4x4_c(in, out, stride, tx_type);
51 void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
53 vp9_fwht4x4_c(in, out, stride);
56 #if CONFIG_VP9_HIGHBITDEPTH
57 void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
58 vpx_highbd_idct4x4_16_add_c(in, out, stride, 10);
61 void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
62 vpx_highbd_idct4x4_16_add_c(in, out, stride, 12);
65 void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
66 vp9_highbd_iht4x4_16_add_c(in, out, stride, tx_type, 10);
69 void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
70 vp9_highbd_iht4x4_16_add_c(in, out, stride, tx_type, 12);
73 void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
74 vpx_highbd_iwht4x4_16_add_c(in, out, stride, 10);
77 void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
78 vpx_highbd_iwht4x4_16_add_c(in, out, stride, 12);
82 void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
83 vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
86 void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
87 vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
90 #endif // CONFIG_VP9_HIGHBITDEPTH
92 class Trans4x4TestBase {
94 virtual ~Trans4x4TestBase() {}
97 virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0;
99 virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0;
101 void RunAccuracyCheck(int limit) {
102 ACMRandom rnd(ACMRandom::DeterministicSeed());
103 uint32_t max_error = 0;
104 int64_t total_error = 0;
105 const int count_test_block = 10000;
106 for (int i = 0; i < count_test_block; ++i) {
107 DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
108 DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
109 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
110 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
111 #if CONFIG_VP9_HIGHBITDEPTH
112 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
113 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
116 // Initialize a test block with input range [-255, 255].
117 for (int j = 0; j < kNumCoeffs; ++j) {
118 if (bit_depth_ == VPX_BITS_8) {
119 src[j] = rnd.Rand8();
120 dst[j] = rnd.Rand8();
121 test_input_block[j] = src[j] - dst[j];
122 #if CONFIG_VP9_HIGHBITDEPTH
124 src16[j] = rnd.Rand16() & mask_;
125 dst16[j] = rnd.Rand16() & mask_;
126 test_input_block[j] = src16[j] - dst16[j];
131 ASM_REGISTER_STATE_CHECK(
132 RunFwdTxfm(test_input_block, test_temp_block, pitch_));
133 if (bit_depth_ == VPX_BITS_8) {
134 ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
135 #if CONFIG_VP9_HIGHBITDEPTH
137 ASM_REGISTER_STATE_CHECK(
138 RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
142 for (int j = 0; j < kNumCoeffs; ++j) {
143 #if CONFIG_VP9_HIGHBITDEPTH
145 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
147 ASSERT_EQ(VPX_BITS_8, bit_depth_);
148 const int diff = dst[j] - src[j];
150 const uint32_t error = diff * diff;
151 if (max_error < error) max_error = error;
152 total_error += error;
156 EXPECT_GE(static_cast<uint32_t>(limit), max_error)
157 << "Error: 4x4 FHT/IHT has an individual round trip error > " << limit;
159 EXPECT_GE(count_test_block * limit, total_error)
160 << "Error: 4x4 FHT/IHT has average round trip error > " << limit
164 void RunCoeffCheck() {
165 ACMRandom rnd(ACMRandom::DeterministicSeed());
166 const int count_test_block = 5000;
167 DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
168 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
169 DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
171 for (int i = 0; i < count_test_block; ++i) {
172 // Initialize a test block with input range [-mask_, mask_].
173 for (int j = 0; j < kNumCoeffs; ++j) {
174 input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
177 fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
178 ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
180 // The minimum quant value is 4.
181 for (int j = 0; j < kNumCoeffs; ++j)
182 EXPECT_EQ(output_block[j], output_ref_block[j]);
187 ACMRandom rnd(ACMRandom::DeterministicSeed());
188 const int count_test_block = 5000;
189 DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
190 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
191 DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
193 for (int i = 0; i < count_test_block; ++i) {
194 // Initialize a test block with input range [-mask_, mask_].
195 for (int j = 0; j < kNumCoeffs; ++j) {
196 input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
199 for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
201 for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
204 fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
205 ASM_REGISTER_STATE_CHECK(
206 RunFwdTxfm(input_extreme_block, output_block, pitch_));
208 // The minimum quant value is 4.
209 for (int j = 0; j < kNumCoeffs; ++j) {
210 EXPECT_EQ(output_block[j], output_ref_block[j]);
211 EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
212 << "Error: 4x4 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
217 void RunInvAccuracyCheck(int limit) {
218 ACMRandom rnd(ACMRandom::DeterministicSeed());
219 const int count_test_block = 1000;
220 DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
221 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
222 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
223 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
224 #if CONFIG_VP9_HIGHBITDEPTH
225 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
226 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
229 for (int i = 0; i < count_test_block; ++i) {
230 // Initialize a test block with input range [-mask_, mask_].
231 for (int j = 0; j < kNumCoeffs; ++j) {
232 if (bit_depth_ == VPX_BITS_8) {
233 src[j] = rnd.Rand8();
234 dst[j] = rnd.Rand8();
235 in[j] = src[j] - dst[j];
236 #if CONFIG_VP9_HIGHBITDEPTH
238 src16[j] = rnd.Rand16() & mask_;
239 dst16[j] = rnd.Rand16() & mask_;
240 in[j] = src16[j] - dst16[j];
245 fwd_txfm_ref(in, coeff, pitch_, tx_type_);
247 if (bit_depth_ == VPX_BITS_8) {
248 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
249 #if CONFIG_VP9_HIGHBITDEPTH
251 ASM_REGISTER_STATE_CHECK(
252 RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
256 for (int j = 0; j < kNumCoeffs; ++j) {
257 #if CONFIG_VP9_HIGHBITDEPTH
259 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
261 const int diff = dst[j] - src[j];
263 const uint32_t error = diff * diff;
264 EXPECT_GE(static_cast<uint32_t>(limit), error)
265 << "Error: 4x4 IDCT has error " << error << " at index " << j;
272 FhtFunc fwd_txfm_ref;
273 vpx_bit_depth_t bit_depth_;
277 class Trans4x4DCT : public Trans4x4TestBase,
278 public ::testing::TestWithParam<Dct4x4Param> {
280 virtual ~Trans4x4DCT() {}
282 virtual void SetUp() {
283 fwd_txfm_ = GET_PARAM(0);
284 inv_txfm_ = GET_PARAM(1);
285 tx_type_ = GET_PARAM(2);
287 fwd_txfm_ref = fdct4x4_ref;
288 bit_depth_ = GET_PARAM(3);
289 mask_ = (1 << bit_depth_) - 1;
291 virtual void TearDown() { libvpx_test::ClearSystemState(); }
294 void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
295 fwd_txfm_(in, out, stride);
297 void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
298 inv_txfm_(out, dst, stride);
305 TEST_P(Trans4x4DCT, AccuracyCheck) { RunAccuracyCheck(1); }
307 TEST_P(Trans4x4DCT, CoeffCheck) { RunCoeffCheck(); }
309 TEST_P(Trans4x4DCT, MemCheck) { RunMemCheck(); }
311 TEST_P(Trans4x4DCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
313 class Trans4x4HT : public Trans4x4TestBase,
314 public ::testing::TestWithParam<Ht4x4Param> {
316 virtual ~Trans4x4HT() {}
318 virtual void SetUp() {
319 fwd_txfm_ = GET_PARAM(0);
320 inv_txfm_ = GET_PARAM(1);
321 tx_type_ = GET_PARAM(2);
323 fwd_txfm_ref = fht4x4_ref;
324 bit_depth_ = GET_PARAM(3);
325 mask_ = (1 << bit_depth_) - 1;
327 virtual void TearDown() { libvpx_test::ClearSystemState(); }
330 void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
331 fwd_txfm_(in, out, stride, tx_type_);
334 void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
335 inv_txfm_(out, dst, stride, tx_type_);
342 TEST_P(Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(1); }
344 TEST_P(Trans4x4HT, CoeffCheck) { RunCoeffCheck(); }
346 TEST_P(Trans4x4HT, MemCheck) { RunMemCheck(); }
348 TEST_P(Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
350 class Trans4x4WHT : public Trans4x4TestBase,
351 public ::testing::TestWithParam<Dct4x4Param> {
353 virtual ~Trans4x4WHT() {}
355 virtual void SetUp() {
356 fwd_txfm_ = GET_PARAM(0);
357 inv_txfm_ = GET_PARAM(1);
358 tx_type_ = GET_PARAM(2);
360 fwd_txfm_ref = fwht4x4_ref;
361 bit_depth_ = GET_PARAM(3);
362 mask_ = (1 << bit_depth_) - 1;
364 virtual void TearDown() { libvpx_test::ClearSystemState(); }
367 void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
368 fwd_txfm_(in, out, stride);
370 void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
371 inv_txfm_(out, dst, stride);
378 TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0); }
380 TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); }
382 TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }
384 TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
385 using std::tr1::make_tuple;
387 #if CONFIG_VP9_HIGHBITDEPTH
388 INSTANTIATE_TEST_CASE_P(
391 make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
392 make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
393 make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
395 INSTANTIATE_TEST_CASE_P(C, Trans4x4DCT,
396 ::testing::Values(make_tuple(&vpx_fdct4x4_c,
397 &vpx_idct4x4_16_add_c, 0,
399 #endif // CONFIG_VP9_HIGHBITDEPTH
401 #if CONFIG_VP9_HIGHBITDEPTH
402 INSTANTIATE_TEST_CASE_P(
405 make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 0, VPX_BITS_10),
406 make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 1, VPX_BITS_10),
407 make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 2, VPX_BITS_10),
408 make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 3, VPX_BITS_10),
409 make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 0, VPX_BITS_12),
410 make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 1, VPX_BITS_12),
411 make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 2, VPX_BITS_12),
412 make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 3, VPX_BITS_12),
413 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
414 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
415 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
416 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
418 INSTANTIATE_TEST_CASE_P(
421 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
422 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
423 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
424 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
425 #endif // CONFIG_VP9_HIGHBITDEPTH
427 #if CONFIG_VP9_HIGHBITDEPTH
428 INSTANTIATE_TEST_CASE_P(
431 make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),
432 make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),
433 make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
435 INSTANTIATE_TEST_CASE_P(C, Trans4x4WHT,
436 ::testing::Values(make_tuple(&vp9_fwht4x4_c,
437 &vpx_iwht4x4_16_add_c, 0,
439 #endif // CONFIG_VP9_HIGHBITDEPTH
441 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
442 INSTANTIATE_TEST_CASE_P(NEON, Trans4x4DCT,
443 ::testing::Values(make_tuple(&vpx_fdct4x4_c,
444 &vpx_idct4x4_16_add_neon,
446 #if !CONFIG_VP9_HIGHBITDEPTH
447 INSTANTIATE_TEST_CASE_P(
450 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8),
451 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
452 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
453 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
454 #endif // !CONFIG_VP9_HIGHBITDEPTH
455 #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
457 #if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
458 INSTANTIATE_TEST_CASE_P(
461 make_tuple(&vp9_fwht4x4_sse2, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8),
462 make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));
465 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
466 INSTANTIATE_TEST_CASE_P(SSE2, Trans4x4DCT,
467 ::testing::Values(make_tuple(&vpx_fdct4x4_sse2,
468 &vpx_idct4x4_16_add_sse2,
470 INSTANTIATE_TEST_CASE_P(
473 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0, VPX_BITS_8),
474 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8),
475 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8),
476 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8)));
477 #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
479 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
480 INSTANTIATE_TEST_CASE_P(
483 make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10_sse2, 0, VPX_BITS_10),
484 make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
485 make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12_sse2, 0, VPX_BITS_12),
486 make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
487 make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
489 INSTANTIATE_TEST_CASE_P(
492 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
493 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
494 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
495 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
496 #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
498 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
499 INSTANTIATE_TEST_CASE_P(MSA, Trans4x4DCT,
500 ::testing::Values(make_tuple(&vpx_fdct4x4_msa,
501 &vpx_idct4x4_16_add_msa, 0,
503 INSTANTIATE_TEST_CASE_P(
506 make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 0, VPX_BITS_8),
507 make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 1, VPX_BITS_8),
508 make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 2, VPX_BITS_8),
509 make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 3, VPX_BITS_8)));
510 #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE