granicus.if.org Git - libvpx/blob - test/dct32x32_test.cc

   1 /*
   2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <math.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14
  15 #include "third_party/googletest/src/include/gtest/gtest.h"
  16
  17 #include "./vp9_rtcd.h"
  18 #include "./vpx_config.h"
  19 #include "./vpx_dsp_rtcd.h"
  20 #include "test/acm_random.h"
  21 #include "test/clear_system_state.h"
  22 #include "test/register_state_check.h"
  23 #include "test/util.h"
  24 #include "vp9/common/vp9_entropy.h"
  25 #include "vpx/vpx_codec.h"
  26 #include "vpx/vpx_integer.h"
  27 #include "vpx_ports/mem.h"
  28 #include "vpx_ports/msvc.h"  // for round()
  29
  30 using libvpx_test::ACMRandom;
  31
  32 namespace {
  33
  34 const int kNumCoeffs = 1024;
  35 const double kPi = 3.141592653589793238462643383279502884;
  36 void reference_32x32_dct_1d(const double in[32], double out[32]) {
  37   const double kInvSqrt2 = 0.707106781186547524400844362104;
  38   for (int k = 0; k < 32; k++) {
  39     out[k] = 0.0;
  40     for (int n = 0; n < 32; n++) {
  41       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
  42     }
  43     if (k == 0) out[k] = out[k] * kInvSqrt2;
  44   }
  45 }
  46
  47 void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
  48                             double output[kNumCoeffs]) {
  49   // First transform columns
  50   for (int i = 0; i < 32; ++i) {
  51     double temp_in[32], temp_out[32];
  52     for (int j = 0; j < 32; ++j) temp_in[j] = input[j * 32 + i];
  53     reference_32x32_dct_1d(temp_in, temp_out);
  54     for (int j = 0; j < 32; ++j) output[j * 32 + i] = temp_out[j];
  55   }
  56   // Then transform rows
  57   for (int i = 0; i < 32; ++i) {
  58     double temp_in[32], temp_out[32];
  59     for (int j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
  60     reference_32x32_dct_1d(temp_in, temp_out);
  61     // Scale by some magic number
  62     for (int j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j] / 4;
  63   }
  64 }
  65
  66 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
  67 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
  68
  69 typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
  70     Trans32x32Param;
  71
  72 #if CONFIG_VP9_HIGHBITDEPTH
  73 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
  74   vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
  75 }
  76
  77 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
  78   vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
  79 }
  80 #endif  // CONFIG_VP9_HIGHBITDEPTH
  81
  82 class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
  83  public:
  84   virtual ~Trans32x32Test() {}
  85   virtual void SetUp() {
  86     fwd_txfm_ = GET_PARAM(0);
  87     inv_txfm_ = GET_PARAM(1);
  88     version_ = GET_PARAM(2);  // 0: high precision forward transform
  89                               // 1: low precision version for rd loop
  90     bit_depth_ = GET_PARAM(3);
  91     mask_ = (1 << bit_depth_) - 1;
  92   }
  93
  94   virtual void TearDown() { libvpx_test::ClearSystemState(); }
  95
  96  protected:
  97   int version_;
  98   vpx_bit_depth_t bit_depth_;
  99   int mask_;
 100   FwdTxfmFunc fwd_txfm_;
 101   InvTxfmFunc inv_txfm_;
 102 };
 103
 104 TEST_P(Trans32x32Test, AccuracyCheck) {
 105   ACMRandom rnd(ACMRandom::DeterministicSeed());
 106   uint32_t max_error = 0;
 107   int64_t total_error = 0;
 108   const int count_test_block = 10000;
 109   DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
 110   DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
 111   DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 112   DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 113 #if CONFIG_VP9_HIGHBITDEPTH
 114   DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 115   DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 116 #endif
 117
 118   for (int i = 0; i < count_test_block; ++i) {
 119     // Initialize a test block with input range [-mask_, mask_].
 120     for (int j = 0; j < kNumCoeffs; ++j) {
 121       if (bit_depth_ == VPX_BITS_8) {
 122         src[j] = rnd.Rand8();
 123         dst[j] = rnd.Rand8();
 124         test_input_block[j] = src[j] - dst[j];
 125 #if CONFIG_VP9_HIGHBITDEPTH
 126       } else {
 127         src16[j] = rnd.Rand16() & mask_;
 128         dst16[j] = rnd.Rand16() & mask_;
 129         test_input_block[j] = src16[j] - dst16[j];
 130 #endif
 131       }
 132     }
 133
 134     ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
 135     if (bit_depth_ == VPX_BITS_8) {
 136       ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
 137 #if CONFIG_VP9_HIGHBITDEPTH
 138     } else {
 139       ASM_REGISTER_STATE_CHECK(
 140           inv_txfm_(test_temp_block, CONVERT_TO_BYTEPTR(dst16), 32));
 141 #endif
 142     }
 143
 144     for (int j = 0; j < kNumCoeffs; ++j) {
 145 #if CONFIG_VP9_HIGHBITDEPTH
 146       const int32_t diff =
 147           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 148 #else
 149       const int32_t diff = dst[j] - src[j];
 150 #endif
 151       const uint32_t error = diff * diff;
 152       if (max_error < error) max_error = error;
 153       total_error += error;
 154     }
 155   }
 156
 157   if (version_ == 1) {
 158     max_error /= 2;
 159     total_error /= 45;
 160   }
 161
 162   EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
 163       << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
 164
 165   EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
 166       << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
 167 }
 168
 169 TEST_P(Trans32x32Test, CoeffCheck) {
 170   ACMRandom rnd(ACMRandom::DeterministicSeed());
 171   const int count_test_block = 1000;
 172
 173   DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
 174   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
 175   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
 176
 177   for (int i = 0; i < count_test_block; ++i) {
 178     for (int j = 0; j < kNumCoeffs; ++j) {
 179       input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
 180     }
 181
 182     const int stride = 32;
 183     vpx_fdct32x32_c(input_block, output_ref_block, stride);
 184     ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
 185
 186     if (version_ == 0) {
 187       for (int j = 0; j < kNumCoeffs; ++j)
 188         EXPECT_EQ(output_block[j], output_ref_block[j])
 189             << "Error: 32x32 FDCT versions have mismatched coefficients";
 190     } else {
 191       for (int j = 0; j < kNumCoeffs; ++j)
 192         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 193             << "Error: 32x32 FDCT rd has mismatched coefficients";
 194     }
 195   }
 196 }
 197
 198 TEST_P(Trans32x32Test, MemCheck) {
 199   ACMRandom rnd(ACMRandom::DeterministicSeed());
 200   const int count_test_block = 2000;
 201
 202   DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
 203   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
 204   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
 205
 206   for (int i = 0; i < count_test_block; ++i) {
 207     // Initialize a test block with input range [-mask_, mask_].
 208     for (int j = 0; j < kNumCoeffs; ++j) {
 209       input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
 210     }
 211     if (i == 0) {
 212       for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
 213     } else if (i == 1) {
 214       for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
 215     }
 216
 217     const int stride = 32;
 218     vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
 219     ASM_REGISTER_STATE_CHECK(
 220         fwd_txfm_(input_extreme_block, output_block, stride));
 221
 222     // The minimum quant value is 4.
 223     for (int j = 0; j < kNumCoeffs; ++j) {
 224       if (version_ == 0) {
 225         EXPECT_EQ(output_block[j], output_ref_block[j])
 226             << "Error: 32x32 FDCT versions have mismatched coefficients";
 227       } else {
 228         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 229             << "Error: 32x32 FDCT rd has mismatched coefficients";
 230       }
 231       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
 232           << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
 233       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
 234           << "Error: 32x32 FDCT has coefficient larger than "
 235           << "4*DCT_MAX_VALUE";
 236     }
 237   }
 238 }
 239
 240 TEST_P(Trans32x32Test, InverseAccuracy) {
 241   ACMRandom rnd(ACMRandom::DeterministicSeed());
 242   const int count_test_block = 1000;
 243   DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
 244   DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
 245   DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 246   DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 247 #if CONFIG_VP9_HIGHBITDEPTH
 248   DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 249   DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 250 #endif
 251
 252   for (int i = 0; i < count_test_block; ++i) {
 253     double out_r[kNumCoeffs];
 254
 255     // Initialize a test block with input range [-255, 255]
 256     for (int j = 0; j < kNumCoeffs; ++j) {
 257       if (bit_depth_ == VPX_BITS_8) {
 258         src[j] = rnd.Rand8();
 259         dst[j] = rnd.Rand8();
 260         in[j] = src[j] - dst[j];
 261 #if CONFIG_VP9_HIGHBITDEPTH
 262       } else {
 263         src16[j] = rnd.Rand16() & mask_;
 264         dst16[j] = rnd.Rand16() & mask_;
 265         in[j] = src16[j] - dst16[j];
 266 #endif
 267       }
 268     }
 269
 270     reference_32x32_dct_2d(in, out_r);
 271     for (int j = 0; j < kNumCoeffs; ++j) {
 272       coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
 273     }
 274     if (bit_depth_ == VPX_BITS_8) {
 275       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
 276 #if CONFIG_VP9_HIGHBITDEPTH
 277     } else {
 278       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
 279 #endif
 280     }
 281     for (int j = 0; j < kNumCoeffs; ++j) {
 282 #if CONFIG_VP9_HIGHBITDEPTH
 283       const int diff =
 284           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 285 #else
 286       const int diff = dst[j] - src[j];
 287 #endif
 288       const int error = diff * diff;
 289       EXPECT_GE(1, error) << "Error: 32x32 IDCT has error " << error
 290                           << " at index " << j;
 291     }
 292   }
 293 }
 294
 295 class PartialTrans32x32Test
 296     : public ::testing::TestWithParam<
 297           std::tr1::tuple<FwdTxfmFunc, vpx_bit_depth_t> > {
 298  public:
 299   virtual ~PartialTrans32x32Test() {}
 300   virtual void SetUp() {
 301     fwd_txfm_ = GET_PARAM(0);
 302     bit_depth_ = GET_PARAM(1);
 303   }
 304
 305   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 306
 307  protected:
 308   vpx_bit_depth_t bit_depth_;
 309   FwdTxfmFunc fwd_txfm_;
 310 };
 311
 312 TEST_P(PartialTrans32x32Test, Extremes) {
 313 #if CONFIG_VP9_HIGHBITDEPTH
 314   const int16_t maxval =
 315       static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
 316 #else
 317   const int16_t maxval = 255;
 318 #endif
 319   const int minval = -maxval;
 320   DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
 321   DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
 322
 323   for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
 324   output[0] = 0;
 325   ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
 326   EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]);
 327
 328   for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
 329   output[0] = 0;
 330   ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
 331   EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]);
 332 }
 333
 334 TEST_P(PartialTrans32x32Test, Random) {
 335 #if CONFIG_VP9_HIGHBITDEPTH
 336   const int16_t maxval =
 337       static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
 338 #else
 339   const int16_t maxval = 255;
 340 #endif
 341   DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
 342   DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
 343   ACMRandom rnd(ACMRandom::DeterministicSeed());
 344
 345   int sum = 0;
 346   for (int i = 0; i < kNumCoeffs; ++i) {
 347     const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
 348     input[i] = val;
 349     sum += val;
 350   }
 351   output[0] = 0;
 352   ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
 353   EXPECT_EQ(sum >> 3, output[0]);
 354 }
 355
 356 using std::tr1::make_tuple;
 357
 358 #if CONFIG_VP9_HIGHBITDEPTH
 359 INSTANTIATE_TEST_CASE_P(
 360     C, Trans32x32Test,
 361     ::testing::Values(
 362         make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_10, 0, VPX_BITS_10),
 363         make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_10, 1, VPX_BITS_10),
 364         make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_12, 0, VPX_BITS_12),
 365         make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_12, 1, VPX_BITS_12),
 366         make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
 367         make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1,
 368                    VPX_BITS_8)));
 369 INSTANTIATE_TEST_CASE_P(
 370     C, PartialTrans32x32Test,
 371     ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_8),
 372                       make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_10),
 373                       make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_12)));
 374 #else
 375 INSTANTIATE_TEST_CASE_P(
 376     C, Trans32x32Test,
 377     ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0,
 378                                  VPX_BITS_8),
 379                       make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c,
 380                                  1, VPX_BITS_8)));
 381 INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test,
 382                         ::testing::Values(make_tuple(&vpx_fdct32x32_1_c,
 383                                                      VPX_BITS_8)));
 384 #endif  // CONFIG_VP9_HIGHBITDEPTH
 385
 386 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 387 INSTANTIATE_TEST_CASE_P(
 388     NEON, Trans32x32Test,
 389     ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_neon,
 390                                  0, VPX_BITS_8),
 391                       make_tuple(&vpx_fdct32x32_rd_c,
 392                                  &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
 393 #endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 394
 395 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 396 INSTANTIATE_TEST_CASE_P(
 397     SSE2, Trans32x32Test,
 398     ::testing::Values(make_tuple(&vpx_fdct32x32_sse2,
 399                                  &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
 400                       make_tuple(&vpx_fdct32x32_rd_sse2,
 401                                  &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 402 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
 403                         ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
 404                                                      VPX_BITS_8)));
 405 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 406
 407 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 408 INSTANTIATE_TEST_CASE_P(
 409     SSE2, Trans32x32Test,
 410     ::testing::Values(
 411         make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
 412         make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
 413                    VPX_BITS_10),
 414         make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
 415         make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
 416                    VPX_BITS_12),
 417         make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
 418                    VPX_BITS_8),
 419         make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
 420                    VPX_BITS_8)));
 421 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
 422                         ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
 423                                                      VPX_BITS_8)));
 424 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 425
 426 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 427 INSTANTIATE_TEST_CASE_P(
 428     AVX2, Trans32x32Test,
 429     ::testing::Values(make_tuple(&vpx_fdct32x32_avx2,
 430                                  &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
 431                       make_tuple(&vpx_fdct32x32_rd_avx2,
 432                                  &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 433 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 434
 435 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 436 INSTANTIATE_TEST_CASE_P(
 437     MSA, Trans32x32Test,
 438     ::testing::Values(make_tuple(&vpx_fdct32x32_msa,
 439                                  &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
 440                       make_tuple(&vpx_fdct32x32_rd_msa,
 441                                  &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
 442 INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test,
 443                         ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa,
 444                                                      VPX_BITS_8)));
 445 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 446 }  // namespace