granicus.if.org Git - libvpx/blob - test/dct32x32_test.cc

   1 /*
   2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <math.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14
  15 #include "third_party/googletest/src/include/gtest/gtest.h"
  16
  17 #include "./vp9_rtcd.h"
  18 #include "./vpx_config.h"
  19 #include "./vpx_dsp_rtcd.h"
  20 #include "test/acm_random.h"
  21 #include "test/clear_system_state.h"
  22 #include "test/register_state_check.h"
  23 #include "test/util.h"
  24 #include "vp9/common/vp9_entropy.h"
  25 #include "vpx/vpx_codec.h"
  26 #include "vpx/vpx_integer.h"
  27 #include "vpx_ports/mem.h"
  28
  29 using libvpx_test::ACMRandom;
  30
  31 namespace {
  32 #ifdef _MSC_VER
  33 static int round(double x) {
  34   if (x < 0)
  35     return static_cast<int>(ceil(x - 0.5));
  36   else
  37     return static_cast<int>(floor(x + 0.5));
  38 }
  39 #endif
  40
  41 const int kNumCoeffs = 1024;
  42 const double kPi = 3.141592653589793238462643383279502884;
  43 void reference_32x32_dct_1d(const double in[32], double out[32]) {
  44   const double kInvSqrt2 = 0.707106781186547524400844362104;
  45   for (int k = 0; k < 32; k++) {
  46     out[k] = 0.0;
  47     for (int n = 0; n < 32; n++)
  48       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
  49     if (k == 0)
  50       out[k] = out[k] * kInvSqrt2;
  51   }
  52 }
  53
  54 void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
  55                             double output[kNumCoeffs]) {
  56   // First transform columns
  57   for (int i = 0; i < 32; ++i) {
  58     double temp_in[32], temp_out[32];
  59     for (int j = 0; j < 32; ++j)
  60       temp_in[j] = input[j*32 + i];
  61     reference_32x32_dct_1d(temp_in, temp_out);
  62     for (int j = 0; j < 32; ++j)
  63       output[j * 32 + i] = temp_out[j];
  64   }
  65   // Then transform rows
  66   for (int i = 0; i < 32; ++i) {
  67     double temp_in[32], temp_out[32];
  68     for (int j = 0; j < 32; ++j)
  69       temp_in[j] = output[j + i*32];
  70     reference_32x32_dct_1d(temp_in, temp_out);
  71     // Scale by some magic number
  72     for (int j = 0; j < 32; ++j)
  73       output[j + i * 32] = temp_out[j] / 4;
  74   }
  75 }
  76
  77 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
  78 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
  79
  80 typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
  81     Trans32x32Param;
  82
  83 #if CONFIG_VP9_HIGHBITDEPTH
  84 void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) {
  85   vpx_highbd_idct32x32_1024_add_c(in, out, stride, 8);
  86 }
  87
  88 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
  89   vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
  90 }
  91
  92 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
  93   vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
  94 }
  95 #endif  // CONFIG_VP9_HIGHBITDEPTH
  96
  97 class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
  98  public:
  99   virtual ~Trans32x32Test() {}
 100   virtual void SetUp() {
 101     fwd_txfm_ = GET_PARAM(0);
 102     inv_txfm_ = GET_PARAM(1);
 103     version_  = GET_PARAM(2);  // 0: high precision forward transform
 104                                // 1: low precision version for rd loop
 105     bit_depth_ = GET_PARAM(3);
 106     mask_ = (1 << bit_depth_) - 1;
 107   }
 108
 109   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 110
 111  protected:
 112   int version_;
 113   vpx_bit_depth_t bit_depth_;
 114   int mask_;
 115   FwdTxfmFunc fwd_txfm_;
 116   InvTxfmFunc inv_txfm_;
 117 };
 118
 119 TEST_P(Trans32x32Test, AccuracyCheck) {
 120   ACMRandom rnd(ACMRandom::DeterministicSeed());
 121   uint32_t max_error = 0;
 122   int64_t total_error = 0;
 123   const int count_test_block = 10000;
 124   DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
 125   DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
 126   DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 127   DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 128 #if CONFIG_VP9_HIGHBITDEPTH
 129   DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 130   DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 131 #endif
 132
 133   for (int i = 0; i < count_test_block; ++i) {
 134     // Initialize a test block with input range [-mask_, mask_].
 135     for (int j = 0; j < kNumCoeffs; ++j) {
 136       if (bit_depth_ == VPX_BITS_8) {
 137         src[j] = rnd.Rand8();
 138         dst[j] = rnd.Rand8();
 139         test_input_block[j] = src[j] - dst[j];
 140 #if CONFIG_VP9_HIGHBITDEPTH
 141       } else {
 142         src16[j] = rnd.Rand16() & mask_;
 143         dst16[j] = rnd.Rand16() & mask_;
 144         test_input_block[j] = src16[j] - dst16[j];
 145 #endif
 146       }
 147     }
 148
 149     ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
 150     if (bit_depth_ == VPX_BITS_8) {
 151       ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
 152 #if CONFIG_VP9_HIGHBITDEPTH
 153     } else {
 154       ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block,
 155                                          CONVERT_TO_BYTEPTR(dst16), 32));
 156 #endif
 157     }
 158
 159     for (int j = 0; j < kNumCoeffs; ++j) {
 160 #if CONFIG_VP9_HIGHBITDEPTH
 161       const uint32_t diff =
 162           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 163 #else
 164       const uint32_t diff = dst[j] - src[j];
 165 #endif
 166       const uint32_t error = diff * diff;
 167       if (max_error < error)
 168         max_error = error;
 169       total_error += error;
 170     }
 171   }
 172
 173   if (version_ == 1) {
 174     max_error /= 2;
 175     total_error /= 45;
 176   }
 177
 178   EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
 179       << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
 180
 181   EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
 182       << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
 183 }
 184
 185 TEST_P(Trans32x32Test, CoeffCheck) {
 186   ACMRandom rnd(ACMRandom::DeterministicSeed());
 187   const int count_test_block = 1000;
 188
 189   DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
 190   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
 191   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
 192
 193   for (int i = 0; i < count_test_block; ++i) {
 194     for (int j = 0; j < kNumCoeffs; ++j)
 195       input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
 196
 197     const int stride = 32;
 198     vpx_fdct32x32_c(input_block, output_ref_block, stride);
 199     ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
 200
 201     if (version_ == 0) {
 202       for (int j = 0; j < kNumCoeffs; ++j)
 203         EXPECT_EQ(output_block[j], output_ref_block[j])
 204             << "Error: 32x32 FDCT versions have mismatched coefficients";
 205     } else {
 206       for (int j = 0; j < kNumCoeffs; ++j)
 207         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 208             << "Error: 32x32 FDCT rd has mismatched coefficients";
 209     }
 210   }
 211 }
 212
 213 TEST_P(Trans32x32Test, MemCheck) {
 214   ACMRandom rnd(ACMRandom::DeterministicSeed());
 215   const int count_test_block = 2000;
 216
 217   DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
 218   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
 219   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
 220
 221   for (int i = 0; i < count_test_block; ++i) {
 222     // Initialize a test block with input range [-mask_, mask_].
 223     for (int j = 0; j < kNumCoeffs; ++j) {
 224       input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
 225     }
 226     if (i == 0) {
 227       for (int j = 0; j < kNumCoeffs; ++j)
 228         input_extreme_block[j] = mask_;
 229     } else if (i == 1) {
 230       for (int j = 0; j < kNumCoeffs; ++j)
 231         input_extreme_block[j] = -mask_;
 232     }
 233
 234     const int stride = 32;
 235     vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
 236     ASM_REGISTER_STATE_CHECK(
 237         fwd_txfm_(input_extreme_block, output_block, stride));
 238
 239     // The minimum quant value is 4.
 240     for (int j = 0; j < kNumCoeffs; ++j) {
 241       if (version_ == 0) {
 242         EXPECT_EQ(output_block[j], output_ref_block[j])
 243             << "Error: 32x32 FDCT versions have mismatched coefficients";
 244       } else {
 245         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 246             << "Error: 32x32 FDCT rd has mismatched coefficients";
 247       }
 248       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
 249           << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
 250       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
 251           << "Error: 32x32 FDCT has coefficient larger than "
 252           << "4*DCT_MAX_VALUE";
 253     }
 254   }
 255 }
 256
 257 TEST_P(Trans32x32Test, InverseAccuracy) {
 258   ACMRandom rnd(ACMRandom::DeterministicSeed());
 259   const int count_test_block = 1000;
 260   DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
 261   DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
 262   DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 263   DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 264 #if CONFIG_VP9_HIGHBITDEPTH
 265   DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 266   DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 267 #endif
 268
 269   for (int i = 0; i < count_test_block; ++i) {
 270     double out_r[kNumCoeffs];
 271
 272     // Initialize a test block with input range [-255, 255]
 273     for (int j = 0; j < kNumCoeffs; ++j) {
 274       if (bit_depth_ == VPX_BITS_8) {
 275         src[j] = rnd.Rand8();
 276         dst[j] = rnd.Rand8();
 277         in[j] = src[j] - dst[j];
 278 #if CONFIG_VP9_HIGHBITDEPTH
 279       } else {
 280         src16[j] = rnd.Rand16() & mask_;
 281         dst16[j] = rnd.Rand16() & mask_;
 282         in[j] = src16[j] - dst16[j];
 283 #endif
 284       }
 285     }
 286
 287     reference_32x32_dct_2d(in, out_r);
 288     for (int j = 0; j < kNumCoeffs; ++j)
 289       coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
 290     if (bit_depth_ == VPX_BITS_8) {
 291       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
 292 #if CONFIG_VP9_HIGHBITDEPTH
 293     } else {
 294       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
 295 #endif
 296     }
 297     for (int j = 0; j < kNumCoeffs; ++j) {
 298 #if CONFIG_VP9_HIGHBITDEPTH
 299       const int diff =
 300           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 301 #else
 302       const int diff = dst[j] - src[j];
 303 #endif
 304       const int error = diff * diff;
 305       EXPECT_GE(1, error)
 306           << "Error: 32x32 IDCT has error " << error
 307           << " at index " << j;
 308     }
 309   }
 310 }
 311
 312 using std::tr1::make_tuple;
 313
 314 #if CONFIG_VP9_HIGHBITDEPTH
 315 INSTANTIATE_TEST_CASE_P(
 316     C, Trans32x32Test,
 317     ::testing::Values(
 318         make_tuple(&vpx_highbd_fdct32x32_c,
 319                    &idct32x32_10, 0, VPX_BITS_10),
 320         make_tuple(&vpx_highbd_fdct32x32_rd_c,
 321                    &idct32x32_10, 1, VPX_BITS_10),
 322         make_tuple(&vpx_highbd_fdct32x32_c,
 323                    &idct32x32_12, 0, VPX_BITS_12),
 324         make_tuple(&vpx_highbd_fdct32x32_rd_c,
 325                    &idct32x32_12, 1, VPX_BITS_12),
 326         make_tuple(&vpx_fdct32x32_c,
 327                    &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
 328         make_tuple(&vpx_fdct32x32_rd_c,
 329                    &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
 330 #else
 331 INSTANTIATE_TEST_CASE_P(
 332     C, Trans32x32Test,
 333     ::testing::Values(
 334         make_tuple(&vpx_fdct32x32_c,
 335                    &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
 336         make_tuple(&vpx_fdct32x32_rd_c,
 337                    &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
 338 #endif  // CONFIG_VP9_HIGHBITDEPTH
 339
 340 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 341 INSTANTIATE_TEST_CASE_P(
 342     NEON, Trans32x32Test,
 343     ::testing::Values(
 344         make_tuple(&vpx_fdct32x32_c,
 345                    &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
 346         make_tuple(&vpx_fdct32x32_rd_c,
 347                    &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
 348 #endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 349
 350 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 351 INSTANTIATE_TEST_CASE_P(
 352     SSE2, Trans32x32Test,
 353     ::testing::Values(
 354         make_tuple(&vpx_fdct32x32_sse2,
 355                    &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
 356         make_tuple(&vpx_fdct32x32_rd_sse2,
 357                    &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 358 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 359
 360 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 361 INSTANTIATE_TEST_CASE_P(
 362     SSE2, Trans32x32Test,
 363     ::testing::Values(
 364         make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
 365         make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
 366                    VPX_BITS_10),
 367         make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
 368         make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
 369                    VPX_BITS_12),
 370         make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
 371                    VPX_BITS_8),
 372         make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
 373                    VPX_BITS_8)));
 374 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 375
 376 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 377 INSTANTIATE_TEST_CASE_P(
 378     AVX2, Trans32x32Test,
 379     ::testing::Values(
 380         make_tuple(&vpx_fdct32x32_avx2,
 381                    &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
 382         make_tuple(&vpx_fdct32x32_rd_avx2,
 383                    &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 384 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 385
 386 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 387 INSTANTIATE_TEST_CASE_P(
 388     MSA, Trans32x32Test,
 389     ::testing::Values(
 390         make_tuple(&vpx_fdct32x32_msa,
 391                    &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
 392         make_tuple(&vpx_fdct32x32_rd_msa,
 393                    &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
 394 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 395 }  // namespace