granicus.if.org Git - libvpx/blob - test/dct16x16_test.cc

   1 /*
   2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <math.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14
  15 #include "third_party/googletest/src/include/gtest/gtest.h"
  16 #include "test/acm_random.h"
  17 #include "test/clear_system_state.h"
  18 #include "test/register_state_check.h"
  19 #include "test/util.h"
  20
  21 #include "./vp9_rtcd.h"
  22 #include "vp9/common/vp9_entropy.h"
  23 #include "vpx/vpx_codec.h"
  24 #include "vpx/vpx_integer.h"
  25 #include "vpx_ports/mem.h"
  26
  27 using libvpx_test::ACMRandom;
  28
  29 namespace {
  30
  31 #ifdef _MSC_VER
  32 static int round(double x) {
  33   if (x < 0)
  34     return static_cast<int>(ceil(x - 0.5));
  35   else
  36     return static_cast<int>(floor(x + 0.5));
  37 }
  38 #endif
  39
  40 const int kNumCoeffs = 256;
  41 const double PI = 3.1415926535898;
  42 void reference2_16x16_idct_2d(double *input, double *output) {
  43   double x;
  44   for (int l = 0; l < 16; ++l) {
  45     for (int k = 0; k < 16; ++k) {
  46       double s = 0;
  47       for (int i = 0; i < 16; ++i) {
  48         for (int j = 0; j < 16; ++j) {
  49           x = cos(PI * j * (l + 0.5) / 16.0) *
  50               cos(PI * i * (k + 0.5) / 16.0) *
  51               input[i * 16 + j] / 256;
  52           if (i != 0)
  53             x *= sqrt(2.0);
  54           if (j != 0)
  55             x *= sqrt(2.0);
  56           s += x;
  57         }
  58       }
  59       output[k*16+l] = s;
  60     }
  61   }
  62 }
  63
  64
  65 const double C1 = 0.995184726672197;
  66 const double C2 = 0.98078528040323;
  67 const double C3 = 0.956940335732209;
  68 const double C4 = 0.923879532511287;
  69 const double C5 = 0.881921264348355;
  70 const double C6 = 0.831469612302545;
  71 const double C7 = 0.773010453362737;
  72 const double C8 = 0.707106781186548;
  73 const double C9 = 0.634393284163646;
  74 const double C10 = 0.555570233019602;
  75 const double C11 = 0.471396736825998;
  76 const double C12 = 0.38268343236509;
  77 const double C13 = 0.290284677254462;
  78 const double C14 = 0.195090322016128;
  79 const double C15 = 0.098017140329561;
  80
  81 void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  82   double step[16];
  83   double intermediate[16];
  84   double temp1, temp2;
  85
  86   // step 1
  87   step[ 0] = input[0] + input[15];
  88   step[ 1] = input[1] + input[14];
  89   step[ 2] = input[2] + input[13];
  90   step[ 3] = input[3] + input[12];
  91   step[ 4] = input[4] + input[11];
  92   step[ 5] = input[5] + input[10];
  93   step[ 6] = input[6] + input[ 9];
  94   step[ 7] = input[7] + input[ 8];
  95   step[ 8] = input[7] - input[ 8];
  96   step[ 9] = input[6] - input[ 9];
  97   step[10] = input[5] - input[10];
  98   step[11] = input[4] - input[11];
  99   step[12] = input[3] - input[12];
 100   step[13] = input[2] - input[13];
 101   step[14] = input[1] - input[14];
 102   step[15] = input[0] - input[15];
 103
 104   // step 2
 105   output[0] = step[0] + step[7];
 106   output[1] = step[1] + step[6];
 107   output[2] = step[2] + step[5];
 108   output[3] = step[3] + step[4];
 109   output[4] = step[3] - step[4];
 110   output[5] = step[2] - step[5];
 111   output[6] = step[1] - step[6];
 112   output[7] = step[0] - step[7];
 113
 114   temp1 = step[ 8] * C7;
 115   temp2 = step[15] * C9;
 116   output[ 8] = temp1 + temp2;
 117
 118   temp1 = step[ 9] * C11;
 119   temp2 = step[14] * C5;
 120   output[ 9] = temp1 - temp2;
 121
 122   temp1 = step[10] * C3;
 123   temp2 = step[13] * C13;
 124   output[10] = temp1 + temp2;
 125
 126   temp1 = step[11] * C15;
 127   temp2 = step[12] * C1;
 128   output[11] = temp1 - temp2;
 129
 130   temp1 = step[11] * C1;
 131   temp2 = step[12] * C15;
 132   output[12] = temp2 + temp1;
 133
 134   temp1 = step[10] * C13;
 135   temp2 = step[13] * C3;
 136   output[13] = temp2 - temp1;
 137
 138   temp1 = step[ 9] * C5;
 139   temp2 = step[14] * C11;
 140   output[14] = temp2 + temp1;
 141
 142   temp1 = step[ 8] * C9;
 143   temp2 = step[15] * C7;
 144   output[15] = temp2 - temp1;
 145
 146   // step 3
 147   step[ 0] = output[0] + output[3];
 148   step[ 1] = output[1] + output[2];
 149   step[ 2] = output[1] - output[2];
 150   step[ 3] = output[0] - output[3];
 151
 152   temp1 = output[4] * C14;
 153   temp2 = output[7] * C2;
 154   step[ 4] = temp1 + temp2;
 155
 156   temp1 = output[5] * C10;
 157   temp2 = output[6] * C6;
 158   step[ 5] = temp1 + temp2;
 159
 160   temp1 = output[5] * C6;
 161   temp2 = output[6] * C10;
 162   step[ 6] = temp2 - temp1;
 163
 164   temp1 = output[4] * C2;
 165   temp2 = output[7] * C14;
 166   step[ 7] = temp2 - temp1;
 167
 168   step[ 8] = output[ 8] + output[11];
 169   step[ 9] = output[ 9] + output[10];
 170   step[10] = output[ 9] - output[10];
 171   step[11] = output[ 8] - output[11];
 172
 173   step[12] = output[12] + output[15];
 174   step[13] = output[13] + output[14];
 175   step[14] = output[13] - output[14];
 176   step[15] = output[12] - output[15];
 177
 178   // step 4
 179   output[ 0] = (step[ 0] + step[ 1]);
 180   output[ 8] = (step[ 0] - step[ 1]);
 181
 182   temp1 = step[2] * C12;
 183   temp2 = step[3] * C4;
 184   temp1 = temp1 + temp2;
 185   output[ 4] = 2*(temp1 * C8);
 186
 187   temp1 = step[2] * C4;
 188   temp2 = step[3] * C12;
 189   temp1 = temp2 - temp1;
 190   output[12] = 2 * (temp1 * C8);
 191
 192   output[ 2] = 2 * ((step[4] + step[ 5]) * C8);
 193   output[14] = 2 * ((step[7] - step[ 6]) * C8);
 194
 195   temp1 = step[4] - step[5];
 196   temp2 = step[6] + step[7];
 197   output[ 6] = (temp1 + temp2);
 198   output[10] = (temp1 - temp2);
 199
 200   intermediate[8] = step[8] + step[14];
 201   intermediate[9] = step[9] + step[15];
 202
 203   temp1 = intermediate[8] * C12;
 204   temp2 = intermediate[9] * C4;
 205   temp1 = temp1 - temp2;
 206   output[3] = 2 * (temp1 * C8);
 207
 208   temp1 = intermediate[8] * C4;
 209   temp2 = intermediate[9] * C12;
 210   temp1 = temp2 + temp1;
 211   output[13] = 2 * (temp1 * C8);
 212
 213   output[ 9] = 2 * ((step[10] + step[11]) * C8);
 214
 215   intermediate[11] = step[10] - step[11];
 216   intermediate[12] = step[12] + step[13];
 217   intermediate[13] = step[12] - step[13];
 218   intermediate[14] = step[ 8] - step[14];
 219   intermediate[15] = step[ 9] - step[15];
 220
 221   output[15] = (intermediate[11] + intermediate[12]);
 222   output[ 1] = -(intermediate[11] - intermediate[12]);
 223
 224   output[ 7] = 2 * (intermediate[13] * C8);
 225
 226   temp1 = intermediate[14] * C12;
 227   temp2 = intermediate[15] * C4;
 228   temp1 = temp1 - temp2;
 229   output[11] = -2 * (temp1 * C8);
 230
 231   temp1 = intermediate[14] * C4;
 232   temp2 = intermediate[15] * C12;
 233   temp1 = temp2 + temp1;
 234   output[ 5] = 2 * (temp1 * C8);
 235 }
 236
 237 void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
 238   // First transform columns
 239   for (int i = 0; i < 16; ++i) {
 240     double temp_in[16], temp_out[16];
 241     for (int j = 0; j < 16; ++j)
 242       temp_in[j] = input[j * 16 + i];
 243     butterfly_16x16_dct_1d(temp_in, temp_out);
 244     for (int j = 0; j < 16; ++j)
 245       output[j * 16 + i] = temp_out[j];
 246   }
 247   // Then transform rows
 248   for (int i = 0; i < 16; ++i) {
 249     double temp_in[16], temp_out[16];
 250     for (int j = 0; j < 16; ++j)
 251       temp_in[j] = output[j + i * 16];
 252     butterfly_16x16_dct_1d(temp_in, temp_out);
 253     // Scale by some magic number
 254     for (int j = 0; j < 16; ++j)
 255       output[j + i * 16] = temp_out[j]/2;
 256   }
 257 }
 258
 259 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
 260 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
 261 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
 262                         int tx_type);
 263 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
 264                         int tx_type);
 265
 266 typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct16x16Param;
 267 typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param;
 268 typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>
 269     Idct16x16Param;
 270
 271 void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
 272                    int /*tx_type*/) {
 273   vp9_fdct16x16_c(in, out, stride);
 274 }
 275
 276 void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
 277                    int /*tx_type*/) {
 278   vp9_idct16x16_256_add_c(in, dest, stride);
 279 }
 280
 281 void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
 282                   int tx_type) {
 283   vp9_fht16x16_c(in, out, stride, tx_type);
 284 }
 285
 286 void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
 287                   int tx_type) {
 288   vp9_iht16x16_256_add_c(in, dest, stride, tx_type);
 289 }
 290
 291 #if CONFIG_VP9_HIGHBITDEPTH
 292 void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
 293   vp9_highbd_idct16x16_256_add_c(in, out, stride, 10);
 294 }
 295
 296 void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
 297   vp9_highbd_idct16x16_256_add_c(in, out, stride, 12);
 298 }
 299
 300 void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
 301                       int tx_type) {
 302   idct16x16_10(in, out, stride);
 303 }
 304
 305 void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
 306                       int tx_type) {
 307   idct16x16_12(in, out, stride);
 308 }
 309
 310 void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 311   vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 10);
 312 }
 313
 314 void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 315   vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
 316 }
 317
 318 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
 319   vp9_highbd_idct16x16_10_add_c(in, out, stride, 10);
 320 }
 321
 322 void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
 323   vp9_highbd_idct16x16_10_add_c(in, out, stride, 12);
 324 }
 325
 326 #if HAVE_SSE2
 327 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 328   vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
 329 }
 330
 331 void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 332   vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
 333 }
 334
 335 void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 336   vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
 337 }
 338
 339 void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 340   vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
 341 }
 342 #endif  // HAVE_SSE2
 343 #endif  // CONFIG_VP9_HIGHBITDEPTH
 344
 345 class Trans16x16TestBase {
 346  public:
 347   virtual ~Trans16x16TestBase() {}
 348
 349  protected:
 350   virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
 351
 352   virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
 353
 354   void RunAccuracyCheck() {
 355     ACMRandom rnd(ACMRandom::DeterministicSeed());
 356     uint32_t max_error = 0;
 357     int64_t total_error = 0;
 358     const int count_test_block = 10000;
 359     for (int i = 0; i < count_test_block; ++i) {
 360       DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
 361       DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
 362       DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 363       DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 364 #if CONFIG_VP9_HIGHBITDEPTH
 365       DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 366       DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 367 #endif
 368
 369       // Initialize a test block with input range [-mask_, mask_].
 370       for (int j = 0; j < kNumCoeffs; ++j) {
 371         if (bit_depth_ == VPX_BITS_8) {
 372           src[j] = rnd.Rand8();
 373           dst[j] = rnd.Rand8();
 374           test_input_block[j] = src[j] - dst[j];
 375 #if CONFIG_VP9_HIGHBITDEPTH
 376         } else {
 377           src16[j] = rnd.Rand16() & mask_;
 378           dst16[j] = rnd.Rand16() & mask_;
 379           test_input_block[j] = src16[j] - dst16[j];
 380 #endif
 381         }
 382       }
 383
 384       ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
 385                                           test_temp_block, pitch_));
 386       if (bit_depth_ == VPX_BITS_8) {
 387         ASM_REGISTER_STATE_CHECK(
 388             RunInvTxfm(test_temp_block, dst, pitch_));
 389 #if CONFIG_VP9_HIGHBITDEPTH
 390       } else {
 391         ASM_REGISTER_STATE_CHECK(
 392             RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 393 #endif
 394       }
 395
 396       for (int j = 0; j < kNumCoeffs; ++j) {
 397 #if CONFIG_VP9_HIGHBITDEPTH
 398         const uint32_t diff =
 399             bit_depth_ == VPX_BITS_8 ?  dst[j] - src[j] : dst16[j] - src16[j];
 400 #else
 401         const uint32_t diff = dst[j] - src[j];
 402 #endif
 403         const uint32_t error = diff * diff;
 404         if (max_error < error)
 405           max_error = error;
 406         total_error += error;
 407       }
 408     }
 409
 410     EXPECT_GE(1u  << 2 * (bit_depth_ - 8), max_error)
 411         << "Error: 16x16 FHT/IHT has an individual round trip error > 1";
 412
 413     EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
 414         << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
 415   }
 416
 417   void RunCoeffCheck() {
 418     ACMRandom rnd(ACMRandom::DeterministicSeed());
 419     const int count_test_block = 1000;
 420     DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
 421     DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
 422     DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
 423
 424     for (int i = 0; i < count_test_block; ++i) {
 425       // Initialize a test block with input range [-mask_, mask_].
 426       for (int j = 0; j < kNumCoeffs; ++j)
 427         input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
 428
 429       fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
 430       ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
 431
 432       // The minimum quant value is 4.
 433       for (int j = 0; j < kNumCoeffs; ++j)
 434         EXPECT_EQ(output_block[j], output_ref_block[j]);
 435     }
 436   }
 437
 438   void RunMemCheck() {
 439     ACMRandom rnd(ACMRandom::DeterministicSeed());
 440     const int count_test_block = 1000;
 441     DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
 442     DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
 443     DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
 444
 445     for (int i = 0; i < count_test_block; ++i) {
 446       // Initialize a test block with input range [-mask_, mask_].
 447       for (int j = 0; j < kNumCoeffs; ++j) {
 448         input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
 449       }
 450       if (i == 0) {
 451         for (int j = 0; j < kNumCoeffs; ++j)
 452           input_extreme_block[j] = mask_;
 453       } else if (i == 1) {
 454         for (int j = 0; j < kNumCoeffs; ++j)
 455           input_extreme_block[j] = -mask_;
 456       }
 457
 458       fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
 459       ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
 460                                           output_block, pitch_));
 461
 462       // The minimum quant value is 4.
 463       for (int j = 0; j < kNumCoeffs; ++j) {
 464         EXPECT_EQ(output_block[j], output_ref_block[j]);
 465         EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
 466             << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
 467       }
 468     }
 469   }
 470
 471   void RunQuantCheck(int dc_thred, int ac_thred) {
 472     ACMRandom rnd(ACMRandom::DeterministicSeed());
 473     const int count_test_block = 100000;
 474     DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
 475     DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
 476
 477     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 478     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
 479 #if CONFIG_VP9_HIGHBITDEPTH
 480     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 481     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
 482 #endif
 483
 484     for (int i = 0; i < count_test_block; ++i) {
 485       // Initialize a test block with input range [-mask_, mask_].
 486       for (int j = 0; j < kNumCoeffs; ++j) {
 487         input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
 488       }
 489       if (i == 0)
 490         for (int j = 0; j < kNumCoeffs; ++j)
 491           input_extreme_block[j] = mask_;
 492       if (i == 1)
 493         for (int j = 0; j < kNumCoeffs; ++j)
 494           input_extreme_block[j] = -mask_;
 495
 496       fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
 497
 498       // clear reconstructed pixel buffers
 499       memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
 500       memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
 501 #if CONFIG_VP9_HIGHBITDEPTH
 502       memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
 503       memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
 504 #endif
 505
 506       // quantization with maximum allowed step sizes
 507       output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
 508       for (int j = 1; j < kNumCoeffs; ++j)
 509         output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
 510       if (bit_depth_ == VPX_BITS_8) {
 511         inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
 512         ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
 513 #if CONFIG_VP9_HIGHBITDEPTH
 514       } else {
 515         inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
 516                      tx_type_);
 517         ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block,
 518                                             CONVERT_TO_BYTEPTR(dst16), pitch_));
 519 #endif
 520       }
 521       if (bit_depth_ == VPX_BITS_8) {
 522         for (int j = 0; j < kNumCoeffs; ++j)
 523           EXPECT_EQ(ref[j], dst[j]);
 524 #if CONFIG_VP9_HIGHBITDEPTH
 525       } else {
 526         for (int j = 0; j < kNumCoeffs; ++j)
 527           EXPECT_EQ(ref16[j], dst16[j]);
 528 #endif
 529       }
 530     }
 531   }
 532
 533   void RunInvAccuracyCheck() {
 534     ACMRandom rnd(ACMRandom::DeterministicSeed());
 535     const int count_test_block = 1000;
 536     DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
 537     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
 538     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 539     DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 540 #if CONFIG_VP9_HIGHBITDEPTH
 541     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 542     DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 543 #endif  // CONFIG_VP9_HIGHBITDEPTH
 544
 545     for (int i = 0; i < count_test_block; ++i) {
 546       double out_r[kNumCoeffs];
 547
 548       // Initialize a test block with input range [-255, 255].
 549       for (int j = 0; j < kNumCoeffs; ++j) {
 550         if (bit_depth_ == VPX_BITS_8) {
 551           src[j] = rnd.Rand8();
 552           dst[j] = rnd.Rand8();
 553           in[j] = src[j] - dst[j];
 554 #if CONFIG_VP9_HIGHBITDEPTH
 555         } else {
 556           src16[j] = rnd.Rand16() & mask_;
 557           dst16[j] = rnd.Rand16() & mask_;
 558           in[j] = src16[j] - dst16[j];
 559 #endif  // CONFIG_VP9_HIGHBITDEPTH
 560         }
 561       }
 562
 563       reference_16x16_dct_2d(in, out_r);
 564       for (int j = 0; j < kNumCoeffs; ++j)
 565         coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
 566
 567       if (bit_depth_ == VPX_BITS_8) {
 568         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
 569 #if CONFIG_VP9_HIGHBITDEPTH
 570       } else {
 571         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
 572                                             16));
 573 #endif  // CONFIG_VP9_HIGHBITDEPTH
 574       }
 575
 576       for (int j = 0; j < kNumCoeffs; ++j) {
 577 #if CONFIG_VP9_HIGHBITDEPTH
 578         const uint32_t diff =
 579             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 580 #else
 581         const uint32_t diff = dst[j] - src[j];
 582 #endif  // CONFIG_VP9_HIGHBITDEPTH
 583         const uint32_t error = diff * diff;
 584         EXPECT_GE(1u, error)
 585             << "Error: 16x16 IDCT has error " << error
 586             << " at index " << j;
 587       }
 588     }
 589   }
 590
 591   void CompareInvReference(IdctFunc ref_txfm, int thresh) {
 592     ACMRandom rnd(ACMRandom::DeterministicSeed());
 593     const int count_test_block = 10000;
 594     const int eob = 10;
 595     const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
 596     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
 597     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 598     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
 599 #if CONFIG_VP9_HIGHBITDEPTH
 600     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 601     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
 602 #endif  // CONFIG_VP9_HIGHBITDEPTH
 603
 604     for (int i = 0; i < count_test_block; ++i) {
 605       for (int j = 0; j < kNumCoeffs; ++j) {
 606         if (j < eob) {
 607           // Random values less than the threshold, either positive or negative
 608           coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
 609         } else {
 610           coeff[scan[j]] = 0;
 611         }
 612         if (bit_depth_ == VPX_BITS_8) {
 613           dst[j] = 0;
 614           ref[j] = 0;
 615 #if CONFIG_VP9_HIGHBITDEPTH
 616         } else {
 617           dst16[j] = 0;
 618           ref16[j] = 0;
 619 #endif  // CONFIG_VP9_HIGHBITDEPTH
 620         }
 621       }
 622       if (bit_depth_ == VPX_BITS_8) {
 623         ref_txfm(coeff, ref, pitch_);
 624         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
 625       } else {
 626 #if CONFIG_VP9_HIGHBITDEPTH
 627         ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
 628         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
 629                                  pitch_));
 630 #endif  // CONFIG_VP9_HIGHBITDEPTH
 631       }
 632
 633       for (int j = 0; j < kNumCoeffs; ++j) {
 634 #if CONFIG_VP9_HIGHBITDEPTH
 635         const uint32_t diff =
 636             bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
 637 #else
 638         const uint32_t diff = dst[j] - ref[j];
 639 #endif  // CONFIG_VP9_HIGHBITDEPTH
 640         const uint32_t error = diff * diff;
 641         EXPECT_EQ(0u, error)
 642             << "Error: 16x16 IDCT Comparison has error " << error
 643             << " at index " << j;
 644       }
 645     }
 646   }
 647
 648   int pitch_;
 649   int tx_type_;
 650   vpx_bit_depth_t bit_depth_;
 651   int mask_;
 652   FhtFunc fwd_txfm_ref;
 653   IhtFunc inv_txfm_ref;
 654 };
 655
 656 class Trans16x16DCT
 657     : public Trans16x16TestBase,
 658       public ::testing::TestWithParam<Dct16x16Param> {
 659  public:
 660   virtual ~Trans16x16DCT() {}
 661
 662   virtual void SetUp() {
 663     fwd_txfm_ = GET_PARAM(0);
 664     inv_txfm_ = GET_PARAM(1);
 665     tx_type_  = GET_PARAM(2);
 666     bit_depth_ = GET_PARAM(3);
 667     pitch_    = 16;
 668     fwd_txfm_ref = fdct16x16_ref;
 669     inv_txfm_ref = idct16x16_ref;
 670     mask_ = (1 << bit_depth_) - 1;
 671 #if CONFIG_VP9_HIGHBITDEPTH
 672     switch (bit_depth_) {
 673       case VPX_BITS_10:
 674         inv_txfm_ref = idct16x16_10_ref;
 675         break;
 676       case VPX_BITS_12:
 677         inv_txfm_ref = idct16x16_12_ref;
 678         break;
 679       default:
 680         inv_txfm_ref = idct16x16_ref;
 681         break;
 682     }
 683 #else
 684     inv_txfm_ref = idct16x16_ref;
 685 #endif
 686   }
 687   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 688
 689  protected:
 690   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
 691     fwd_txfm_(in, out, stride);
 692   }
 693   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
 694     inv_txfm_(out, dst, stride);
 695   }
 696
 697   FdctFunc fwd_txfm_;
 698   IdctFunc inv_txfm_;
 699 };
 700
 701 TEST_P(Trans16x16DCT, AccuracyCheck) {
 702   RunAccuracyCheck();
 703 }
 704
 705 TEST_P(Trans16x16DCT, CoeffCheck) {
 706   RunCoeffCheck();
 707 }
 708
 709 TEST_P(Trans16x16DCT, MemCheck) {
 710   RunMemCheck();
 711 }
 712
 713 TEST_P(Trans16x16DCT, QuantCheck) {
 714   // Use maximally allowed quantization step sizes for DC and AC
 715   // coefficients respectively.
 716   RunQuantCheck(1336, 1828);
 717 }
 718
 719 TEST_P(Trans16x16DCT, InvAccuracyCheck) {
 720   RunInvAccuracyCheck();
 721 }
 722
 723 class Trans16x16HT
 724     : public Trans16x16TestBase,
 725       public ::testing::TestWithParam<Ht16x16Param> {
 726  public:
 727   virtual ~Trans16x16HT() {}
 728
 729   virtual void SetUp() {
 730     fwd_txfm_ = GET_PARAM(0);
 731     inv_txfm_ = GET_PARAM(1);
 732     tx_type_  = GET_PARAM(2);
 733     bit_depth_ = GET_PARAM(3);
 734     pitch_    = 16;
 735     fwd_txfm_ref = fht16x16_ref;
 736     inv_txfm_ref = iht16x16_ref;
 737     mask_ = (1 << bit_depth_) - 1;
 738 #if CONFIG_VP9_HIGHBITDEPTH
 739     switch (bit_depth_) {
 740       case VPX_BITS_10:
 741         inv_txfm_ref = iht16x16_10;
 742         break;
 743       case VPX_BITS_12:
 744         inv_txfm_ref = iht16x16_12;
 745         break;
 746       default:
 747         inv_txfm_ref = iht16x16_ref;
 748         break;
 749     }
 750 #else
 751     inv_txfm_ref = iht16x16_ref;
 752 #endif
 753   }
 754   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 755
 756  protected:
 757   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
 758     fwd_txfm_(in, out, stride, tx_type_);
 759   }
 760   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
 761     inv_txfm_(out, dst, stride, tx_type_);
 762   }
 763
 764   FhtFunc fwd_txfm_;
 765   IhtFunc inv_txfm_;
 766 };
 767
 768 TEST_P(Trans16x16HT, AccuracyCheck) {
 769   RunAccuracyCheck();
 770 }
 771
 772 TEST_P(Trans16x16HT, CoeffCheck) {
 773   RunCoeffCheck();
 774 }
 775
 776 TEST_P(Trans16x16HT, MemCheck) {
 777   RunMemCheck();
 778 }
 779
 780 TEST_P(Trans16x16HT, QuantCheck) {
 781   // The encoder skips any non-DC intra prediction modes,
 782   // when the quantization step size goes beyond 988.
 783   RunQuantCheck(429, 729);
 784 }
 785
 786 class InvTrans16x16DCT
 787     : public Trans16x16TestBase,
 788       public ::testing::TestWithParam<Idct16x16Param> {
 789  public:
 790   virtual ~InvTrans16x16DCT() {}
 791
 792   virtual void SetUp() {
 793     ref_txfm_ = GET_PARAM(0);
 794     inv_txfm_ = GET_PARAM(1);
 795     thresh_ = GET_PARAM(2);
 796     bit_depth_ = GET_PARAM(3);
 797     pitch_ = 16;
 798     mask_ = (1 << bit_depth_) - 1;
 799 }
 800   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 801
 802  protected:
 803   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {}
 804   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
 805     inv_txfm_(out, dst, stride);
 806   }
 807
 808   IdctFunc ref_txfm_;
 809   IdctFunc inv_txfm_;
 810   int thresh_;
 811 };
 812
 813 TEST_P(InvTrans16x16DCT, CompareReference) {
 814   CompareInvReference(ref_txfm_, thresh_);
 815 }
 816
 817 using std::tr1::make_tuple;
 818
 819 #if CONFIG_VP9_HIGHBITDEPTH
 820 INSTANTIATE_TEST_CASE_P(
 821     C, Trans16x16DCT,
 822     ::testing::Values(
 823         make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
 824         make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
 825         make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
 826 #else
 827 INSTANTIATE_TEST_CASE_P(
 828     C, Trans16x16DCT,
 829     ::testing::Values(
 830         make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
 831 #endif  // CONFIG_VP9_HIGHBITDEPTH
 832
 833 #if CONFIG_VP9_HIGHBITDEPTH
 834 INSTANTIATE_TEST_CASE_P(
 835     C, Trans16x16HT,
 836     ::testing::Values(
 837         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 0, VPX_BITS_10),
 838         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 1, VPX_BITS_10),
 839         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 2, VPX_BITS_10),
 840         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 3, VPX_BITS_10),
 841         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 0, VPX_BITS_12),
 842         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 1, VPX_BITS_12),
 843         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 2, VPX_BITS_12),
 844         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 3, VPX_BITS_12),
 845         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
 846         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
 847         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
 848         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
 849 #else
 850 INSTANTIATE_TEST_CASE_P(
 851     C, Trans16x16HT,
 852     ::testing::Values(
 853         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
 854         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
 855         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
 856         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
 857 #endif  // CONFIG_VP9_HIGHBITDEPTH
 858
 859 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 860 INSTANTIATE_TEST_CASE_P(
 861     NEON, Trans16x16DCT,
 862     ::testing::Values(
 863         make_tuple(&vp9_fdct16x16_c,
 864                    &vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));
 865 #endif
 866
 867 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 868 INSTANTIATE_TEST_CASE_P(
 869     SSE2, Trans16x16DCT,
 870     ::testing::Values(
 871         make_tuple(&vp9_fdct16x16_sse2,
 872                    &vp9_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
 873 INSTANTIATE_TEST_CASE_P(
 874     SSE2, Trans16x16HT,
 875     ::testing::Values(
 876         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0,
 877                    VPX_BITS_8),
 878         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1,
 879                    VPX_BITS_8),
 880         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2,
 881                    VPX_BITS_8),
 882         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3,
 883                    VPX_BITS_8)));
 884 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 885
 886 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 887 INSTANTIATE_TEST_CASE_P(
 888     SSE2, Trans16x16DCT,
 889     ::testing::Values(
 890         make_tuple(&vp9_highbd_fdct16x16_sse2,
 891                    &idct16x16_10, 0, VPX_BITS_10),
 892         make_tuple(&vp9_highbd_fdct16x16_c,
 893                    &idct16x16_256_add_10_sse2, 0, VPX_BITS_10),
 894         make_tuple(&vp9_highbd_fdct16x16_sse2,
 895                    &idct16x16_12, 0, VPX_BITS_12),
 896         make_tuple(&vp9_highbd_fdct16x16_c,
 897                    &idct16x16_256_add_12_sse2, 0, VPX_BITS_12),
 898         make_tuple(&vp9_fdct16x16_sse2,
 899                    &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
 900 INSTANTIATE_TEST_CASE_P(
 901     SSE2, Trans16x16HT,
 902     ::testing::Values(
 903         make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 0, VPX_BITS_10),
 904         make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 1, VPX_BITS_10),
 905         make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 2, VPX_BITS_10),
 906         make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 3, VPX_BITS_10),
 907         make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 0, VPX_BITS_12),
 908         make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 1, VPX_BITS_12),
 909         make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 2, VPX_BITS_12),
 910         make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 3, VPX_BITS_12),
 911         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
 912         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
 913         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
 914         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 3,
 915                    VPX_BITS_8)));
 916 // Optimizations take effect at a threshold of 3155, so we use a value close to
 917 // that to test both branches.
 918 INSTANTIATE_TEST_CASE_P(
 919     SSE2, InvTrans16x16DCT,
 920     ::testing::Values(
 921         make_tuple(&idct16x16_10_add_10_c,
 922                    &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10),
 923         make_tuple(&idct16x16_10,
 924                    &idct16x16_256_add_10_sse2, 3167, VPX_BITS_10),
 925         make_tuple(&idct16x16_10_add_12_c,
 926                    &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
 927         make_tuple(&idct16x16_12,
 928                    &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
 929 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 930
 931 #if 0  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 932 // TODO(parag): enable when function hooks are added
 933 INSTANTIATE_TEST_CASE_P(
 934     MSA, Trans16x16DCT,
 935     ::testing::Values(
 936         make_tuple(&vp9_fdct16x16_c,
 937                    &vp9_idct16x16_256_add_msa, 0, VPX_BITS_8)));
 938 INSTANTIATE_TEST_CASE_P(
 939     MSA, Trans16x16HT,
 940     ::testing::Values(
 941         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8),
 942         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8),
 943         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
 944         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_msa, 3, VPX_BITS_8)));
 945 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 946 }  // namespace