#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_dsp_rtcd.h"
+#include "vpx_ports/vpx_timer.h"
#include "test/acm_random.h"
#include "test/register_state_check.h"
using ::libvpx_test::ACMRandom;
-typedef void (*HadamardFunc)(const int16_t *a, int a_stride, tran_low_t *b);
+typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride,
+ tran_low_t *b);
-void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
- int16_t b[8];
+void hadamard_loop(const tran_low_t *a, tran_low_t *out) {
+ tran_low_t b[8];
for (int i = 0; i < 8; i += 2) {
- b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride];
- b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride];
+ b[i + 0] = a[i * 8] + a[(i + 1) * 8];
+ b[i + 1] = a[i * 8] - a[(i + 1) * 8];
}
- int16_t c[8];
+ tran_low_t c[8];
for (int i = 0; i < 8; i += 4) {
c[i + 0] = b[i + 0] + b[i + 2];
c[i + 1] = b[i + 1] + b[i + 3];
}
void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) {
- int16_t buf[64];
- int16_t buf2[64];
- for (int i = 0; i < 8; ++i) hadamard_loop(a + i, a_stride, buf + i * 8);
- for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, 8, buf2 + i * 8);
-
- for (int i = 0; i < 64; ++i) b[i] = (tran_low_t)buf2[i];
+ tran_low_t input[64];
+ tran_low_t buf[64];
+ for (int i = 0; i < 8; ++i) {
+ for (int j = 0; j < 8; ++j) {
+ input[i * 8 + j] = static_cast<tran_low_t>(a[i * a_stride + j]);
+ }
+ }
+ for (int i = 0; i < 8; ++i) hadamard_loop(input + i, buf + i * 8);
+ for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, b + i * 8);
}
void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
}
}
-class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
- public:
- virtual void SetUp() {
- h_func_ = GetParam();
- rnd_.Reset(ACMRandom::DeterministicSeed());
- }
+void reference_hadamard32x32(const int16_t *a, int a_stride, tran_low_t *b) {
+ reference_hadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0);
+ reference_hadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256);
+ reference_hadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512);
+ reference_hadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768);
- protected:
- HadamardFunc h_func_;
- ACMRandom rnd_;
-};
+ for (int i = 0; i < 256; ++i) {
+ const tran_low_t a0 = b[0];
+ const tran_low_t a1 = b[256];
+ const tran_low_t a2 = b[512];
+ const tran_low_t a3 = b[768];
-class Hadamard8x8Test : public HadamardTestBase {};
+ const tran_low_t b0 = (a0 + a1) >> 2;
+ const tran_low_t b1 = (a0 - a1) >> 2;
+ const tran_low_t b2 = (a2 + a3) >> 2;
+ const tran_low_t b3 = (a2 - a3) >> 2;
-TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
- DECLARE_ALIGNED(16, int16_t, a[64]);
- DECLARE_ALIGNED(16, tran_low_t, b[64]);
- tran_low_t b_ref[64];
- for (int i = 0; i < 64; ++i) {
- a[i] = rnd_.Rand9Signed();
+ b[0] = b0 + b2;
+ b[256] = b1 + b3;
+ b[512] = b0 - b2;
+ b[768] = b1 - b3;
+
+ ++b;
}
- memset(b, 0, sizeof(b));
- memset(b_ref, 0, sizeof(b_ref));
+}
- reference_hadamard8x8(a, 8, b_ref);
- ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
+struct HadamardFuncWithSize {
+ HadamardFuncWithSize(HadamardFunc f, int s) : func(f), block_size(s) {}
+ HadamardFunc func;
+ int block_size;
+};
- // The order of the output is not important. Sort before checking.
- std::sort(b, b + 64);
- std::sort(b_ref, b_ref + 64);
- EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+std::ostream &operator<<(std::ostream &os, const HadamardFuncWithSize &hfs) {
+ return os << "block size: " << hfs.block_size;
}
-TEST_P(Hadamard8x8Test, VaryStride) {
- DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
- DECLARE_ALIGNED(16, tran_low_t, b[64]);
- tran_low_t b_ref[64];
- for (int i = 0; i < 64 * 8; ++i) {
- a[i] = rnd_.Rand9Signed();
+class HadamardTestBase : public ::testing::TestWithParam<HadamardFuncWithSize> {
+ public:
+ virtual void SetUp() {
+ h_func_ = GetParam().func;
+ bwh_ = GetParam().block_size;
+ block_size_ = bwh_ * bwh_;
+ rnd_.Reset(ACMRandom::DeterministicSeed());
+ }
+
+ virtual int16_t Rand() = 0;
+
+ void ReferenceHadamard(const int16_t *a, int a_stride, tran_low_t *b,
+ int bwh) {
+ if (bwh == 32)
+ reference_hadamard32x32(a, a_stride, b);
+ else if (bwh == 16)
+ reference_hadamard16x16(a, a_stride, b);
+ else
+ reference_hadamard8x8(a, a_stride, b);
}
- for (int i = 8; i < 64; i += 8) {
+ void CompareReferenceRandom() {
+ const int kMaxBlockSize = 32 * 32;
+ DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]);
+ DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
+ memset(a, 0, sizeof(a));
memset(b, 0, sizeof(b));
+
+ tran_low_t b_ref[kMaxBlockSize];
memset(b_ref, 0, sizeof(b_ref));
- reference_hadamard8x8(a, i, b_ref);
- ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+ for (int i = 0; i < block_size_; ++i) a[i] = Rand();
+
+ ReferenceHadamard(a, bwh_, b_ref, bwh_);
+ ASM_REGISTER_STATE_CHECK(h_func_(a, bwh_, b));
// The order of the output is not important. Sort before checking.
- std::sort(b, b + 64);
- std::sort(b_ref, b_ref + 64);
+ std::sort(b, b + block_size_);
+ std::sort(b_ref, b_ref + block_size_);
EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
}
+
+ void VaryStride() {
+ const int kMaxBlockSize = 32 * 32;
+ DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]);
+ DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
+ memset(a, 0, sizeof(a));
+ for (int i = 0; i < block_size_ * 8; ++i) a[i] = Rand();
+
+ tran_low_t b_ref[kMaxBlockSize];
+ for (int i = 8; i < 64; i += 8) {
+ memset(b, 0, sizeof(b));
+ memset(b_ref, 0, sizeof(b_ref));
+
+ ReferenceHadamard(a, i, b_ref, bwh_);
+ ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+
+ // The order of the output is not important. Sort before checking.
+ std::sort(b, b + block_size_);
+ std::sort(b_ref, b_ref + block_size_);
+ EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+ }
+ }
+
+ void SpeedTest(int times) {
+ const int kMaxBlockSize = 32 * 32;
+ DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]);
+ DECLARE_ALIGNED(16, tran_low_t, output[kMaxBlockSize]);
+ memset(input, 1, sizeof(input));
+ memset(output, 0, sizeof(output));
+
+ vpx_usec_timer timer;
+ vpx_usec_timer_start(&timer);
+ for (int i = 0; i < times; ++i) {
+ h_func_(input, bwh_, output);
+ }
+ vpx_usec_timer_mark(&timer);
+
+ const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
+ printf("Hadamard%dx%d[%12d runs]: %d us\n", bwh_, bwh_, times,
+ elapsed_time);
+ }
+
+ protected:
+ int bwh_;
+ int block_size_;
+ HadamardFunc h_func_;
+ ACMRandom rnd_;
+};
+
+class HadamardLowbdTest : public HadamardTestBase {
+ protected:
+ virtual int16_t Rand() { return rnd_.Rand9Signed(); }
+};
+
+TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
+
+TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); }
+
+TEST_P(HadamardLowbdTest, DISABLED_Speed) {
+ SpeedTest(10);
+ SpeedTest(10000);
+ SpeedTest(10000000);
}
-INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
- ::testing::Values(&vpx_hadamard_8x8_c));
+INSTANTIATE_TEST_CASE_P(
+ C, HadamardLowbdTest,
+ ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_c, 8),
+ HadamardFuncWithSize(&vpx_hadamard_16x16_c, 16),
+ HadamardFuncWithSize(&vpx_hadamard_32x32_c, 32)));
#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
- ::testing::Values(&vpx_hadamard_8x8_sse2));
+INSTANTIATE_TEST_CASE_P(
+ SSE2, HadamardLowbdTest,
+ ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_sse2, 8),
+ HadamardFuncWithSize(&vpx_hadamard_16x16_sse2, 16),
+ HadamardFuncWithSize(&vpx_hadamard_32x32_sse2, 32)));
#endif // HAVE_SSE2
-#if HAVE_SSSE3 && ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
- ::testing::Values(&vpx_hadamard_8x8_ssse3));
-#endif // HAVE_SSSE3 && ARCH_X86_64
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(
+ AVX2, HadamardLowbdTest,
+ ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_16x16_avx2, 16),
+ HadamardFuncWithSize(&vpx_hadamard_32x32_avx2, 32)));
+#endif // HAVE_AVX2
+
+#if HAVE_SSSE3 && VPX_ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(
+ SSSE3, HadamardLowbdTest,
+ ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_ssse3, 8)));
+#endif // HAVE_SSSE3 && VPX_ARCH_X86_64
#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
- ::testing::Values(&vpx_hadamard_8x8_neon));
+INSTANTIATE_TEST_CASE_P(
+ NEON, HadamardLowbdTest,
+ ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_neon, 8),
+ HadamardFuncWithSize(&vpx_hadamard_16x16_neon, 16)));
#endif // HAVE_NEON
// TODO(jingning): Remove highbitdepth flag when the SIMD functions are
// in place and turn on the unit test.
#if !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, Hadamard8x8Test,
- ::testing::Values(&vpx_hadamard_8x8_msa));
+INSTANTIATE_TEST_CASE_P(
+ MSA, HadamardLowbdTest,
+ ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_msa, 8),
+ HadamardFuncWithSize(&vpx_hadamard_16x16_msa, 16)));
#endif // HAVE_MSA
#endif // !CONFIG_VP9_HIGHBITDEPTH
-class Hadamard16x16Test : public HadamardTestBase {};
+#if HAVE_VSX
+INSTANTIATE_TEST_CASE_P(
+ VSX, HadamardLowbdTest,
+ ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_vsx, 8),
+ HadamardFuncWithSize(&vpx_hadamard_16x16_vsx, 16)));
+#endif // HAVE_VSX
-TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
- DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
- DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]);
- tran_low_t b_ref[16 * 16];
- for (int i = 0; i < 16 * 16; ++i) {
- a[i] = rnd_.Rand9Signed();
- }
- memset(b, 0, sizeof(b));
- memset(b_ref, 0, sizeof(b_ref));
-
- reference_hadamard16x16(a, 16, b_ref);
- ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
-
- // The order of the output is not important. Sort before checking.
- std::sort(b, b + 16 * 16);
- std::sort(b_ref, b_ref + 16 * 16);
- EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
-}
-
-TEST_P(Hadamard16x16Test, VaryStride) {
- DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
- DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]);
- tran_low_t b_ref[16 * 16];
- for (int i = 0; i < 16 * 16 * 8; ++i) {
- a[i] = rnd_.Rand9Signed();
- }
+#if CONFIG_VP9_HIGHBITDEPTH
+class HadamardHighbdTest : public HadamardTestBase {
+ protected:
+ virtual int16_t Rand() { return rnd_.Rand13Signed(); }
+};
- for (int i = 8; i < 64; i += 8) {
- memset(b, 0, sizeof(b));
- memset(b_ref, 0, sizeof(b_ref));
+TEST_P(HadamardHighbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
- reference_hadamard16x16(a, i, b_ref);
- ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+TEST_P(HadamardHighbdTest, VaryStride) { VaryStride(); }
- // The order of the output is not important. Sort before checking.
- std::sort(b, b + 16 * 16);
- std::sort(b_ref, b_ref + 16 * 16);
- EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
- }
+TEST_P(HadamardHighbdTest, DISABLED_Speed) {
+ SpeedTest(10);
+ SpeedTest(10000);
+ SpeedTest(10000000);
}
-INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
- ::testing::Values(&vpx_hadamard_16x16_c));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
- ::testing::Values(&vpx_hadamard_16x16_sse2));
-#endif // HAVE_SSE2
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
- ::testing::Values(&vpx_hadamard_16x16_neon));
-#endif // HAVE_NEON
-
-#if !CONFIG_VP9_HIGHBITDEPTH
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, Hadamard16x16Test,
- ::testing::Values(&vpx_hadamard_16x16_msa));
-#endif // HAVE_MSA
-#endif // !CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+ C, HadamardHighbdTest,
+ ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_c, 8),
+ HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_c, 16),
+ HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_c, 32)));
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(
+ AVX2, HadamardHighbdTest,
+ ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_avx2, 8),
+ HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_avx2, 16),
+ HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_avx2,
+ 32)));
+#endif // HAVE_AVX2
+
+#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace