Bench Class For More Robust Speed Tests

author Luc Trudeau <luc@trud.ca>

Tue, 22 May 2018 18:16:15 +0000 (14:16 -0400)

committer Luc Trudeau <luc@trud.ca>

Tue, 29 May 2018 13:04:47 +0000 (13:04 +0000)
author Luc Trudeau <luc@trud.ca>
Tue, 22 May 2018 18:16:15 +0000 (14:16 -0400)
committer Luc Trudeau <luc@trud.ca>
Tue, 29 May 2018 13:04:47 +0000 (13:04 +0000)
diff --git a/test/bench.cc b/test/bench.cc

new file mode 100644 (file)

index 0000000..281b741
--- /dev/null
+++ b/test/bench.cc
@@ -0,0 +1,38 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include <algorithm>
+
+#include "test/bench.h"
+#include "vpx_ports/vpx_timer.h"
+
+void AbstractBench::runNTimes(int n) {
+  for (int r = 0; r < VPX_BENCH_ROBUST_ITER; r++) {
+    vpx_usec_timer timer;
+    vpx_usec_timer_start(&timer);
+    for (int j = 0; j < n; ++j) {
+      run();
+    }
+    vpx_usec_timer_mark(&timer);
+    times[r] = static_cast<int>(vpx_usec_timer_elapsed(&timer));
+  }
+}
+
+void AbstractBench::printMedian(const char *title) {
+  std::sort(times, times + VPX_BENCH_ROBUST_ITER);
+  const int med = times[VPX_BENCH_ROBUST_ITER >> 1];
+  int sad = 0;
+  for (int t = 0; t < VPX_BENCH_ROBUST_ITER; t++) {
+    sad += abs(times[t] - med);
+  }
+  printf("[%10s] %s %.1f ms ( ±%.1f ms )\n", "BENCH ", title, med / 1000.0,
+         sad / (VPX_BENCH_ROBUST_ITER * 1000.0));
+}
diff --git a/test/bench.h b/test/bench.h

new file mode 100644 (file)

index 0000000..0b0cf10
--- /dev/null
+++ b/test/bench.h
@@ -0,0 +1,30 @@
+/*
+ *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef TEST_BENCH_H_
+#define TEST_BENCH_H_
+
+// Number of iterations used to compute median run time.
+#define VPX_BENCH_ROBUST_ITER 15
+
+class AbstractBench {
+ public:
+  void runNTimes(int n);
+  void printMedian(const char *title);
+
+ protected:
+  // Implement this method and put the code to benchmark in it.
+  virtual void run() = 0;
+
+ private:
+  int times[VPX_BENCH_ROBUST_ITER];
+};
+
+#endif  // TEST_BENCH_H_
diff --git a/test/test.mk b/test/test.mk

index 3e5739e21c9fc9d497d87a7fafa506205ca96175..224ac4e8f2bdf318b500ab2228a9819118b753d7 100644 (file)
--- a/test/test.mk
+++ b/test/test.mk
@@ -1,4 +1,6 @@
  LIBVPX_TEST_SRCS-yes += acm_random.h
+LIBVPX_TEST_SRCS-yes += bench.h
+LIBVPX_TEST_SRCS-yes += bench.cc
  LIBVPX_TEST_SRCS-yes += buffer.h
  LIBVPX_TEST_SRCS-yes += clear_system_state.h
  LIBVPX_TEST_SRCS-yes += codec_factory.h
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc

index 33932ff95bdbcc48833f4df2f6624d26ae3dcf50..c39267faaa414ad7909a1f2d08541bcf7d5489a8 100644 (file)
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -18,6 +18,7 @@
  #include "./vpx_config.h"
  #include "./vpx_dsp_rtcd.h"
  #include "test/acm_random.h"
+#include "test/bench.h"
  #include "test/buffer.h"
  #include "test/clear_system_state.h"
  #include "test/register_state_check.h"
@@ -67,10 +68,13 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
       scan, iscan);
  }
  
-class VP9QuantizeBase {
+class VP9QuantizeBase : public AbstractBench {
   public:
    VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
-      : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
+      : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp),
+        coeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 16)),
+        qcoeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)),
+        dqcoeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)) {
      max_value_ = (1 << bit_depth_) - 1;
      zbin_ptr_ =
          reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
@@ -86,6 +90,9 @@ class VP9QuantizeBase {
          vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
      dequant_ptr_ = reinterpret_cast<int16_t *>(
          vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
+
+    r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
+    q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
    }
  
    ~VP9QuantizeBase() {
@@ -118,6 +125,15 @@ class VP9QuantizeBase {
    int max_value_;
    const int max_size_;
    const bool is_fp_;
+  Buffer<tran_low_t> coeff;
+  Buffer<tran_low_t> qcoeff;
+  Buffer<tran_low_t> dqcoeff;
+  int16_t *r_ptr;
+  int16_t *q_ptr;
+  int count;
+  int skip_block;
+  const scan_order *scan;
+  uint16_t eob;
  };
  
  class VP9QuantizeTest : public VP9QuantizeBase,
@@ -128,10 +144,17 @@ class VP9QuantizeTest : public VP9QuantizeBase,
          quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
  
   protected:
+  void run();
    const QuantizeFunc quantize_op_;
    const QuantizeFunc ref_quantize_op_;
  };
  
+void VP9QuantizeTest::run() {
+  quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
+               quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
+               dequant_ptr_, &eob, scan->scan, scan->iscan);
+}
+
  // This quantizer compares the AC coefficients to the quantization step size to
  // determine if further multiplication operations are needed.
  // Based on vp9_quantize_fp_sse2().
@@ -269,11 +292,8 @@ void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
  
  TEST_P(VP9QuantizeTest, OperationCheck) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
    ASSERT_TRUE(coeff.Init());
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
    ASSERT_TRUE(qcoeff.Init());
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
    ASSERT_TRUE(dqcoeff.Init());
    Buffer<tran_low_t> ref_qcoeff =
        Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
@@ -281,7 +301,8 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
    Buffer<tran_low_t> ref_dqcoeff =
        Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
    ASSERT_TRUE(ref_dqcoeff.Init());
-  uint16_t eob, ref_eob;
+  uint16_t ref_eob = 0;
+  eob = 0;
  
    for (int i = 0; i < number_of_iterations; ++i) {
      // Test skip block for the first three iterations to catch all the different
@@ -294,23 +315,21 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
        sz = TX_32X32;
      }
      const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    const int count = (4 << sz) * (4 << sz);
+    scan = &vp9_scan_orders[sz][tx_type];
+    count = (4 << sz) * (4 << sz);
      coeff.Set(&rnd, -max_value_, max_value_);
      GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
                           quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
                           quant_fp_ptr_);
-    int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
-    int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
      ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
                       q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
                       ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
-                     scan_order->scan, scan_order->iscan);
+                     scan->scan, scan->iscan);
  
      ASM_REGISTER_STATE_CHECK(quantize_op_(
          coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
          quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
-        dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
+        dequant_ptr_, &eob, scan->scan, scan->iscan));
  
      EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
      EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -328,11 +347,8 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
  
  TEST_P(VP9QuantizeTest, EOBCheck) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
    ASSERT_TRUE(coeff.Init());
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
    ASSERT_TRUE(qcoeff.Init());
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
    ASSERT_TRUE(dqcoeff.Init());
    Buffer<tran_low_t> ref_qcoeff =
        Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
@@ -340,10 +356,12 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
    Buffer<tran_low_t> ref_dqcoeff =
        Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
    ASSERT_TRUE(ref_dqcoeff.Init());
-  uint16_t eob, ref_eob;
+  uint16_t ref_eob = 0;
+  eob = 0;
+  const uint32_t max_index = max_size_ * max_size_ - 1;
  
    for (int i = 0; i < number_of_iterations; ++i) {
-    const int skip_block = 0;
+    skip_block = 0;
      TX_SIZE sz;
      if (max_size_ == 16) {
        sz = static_cast<TX_SIZE>(i % 3);  // TX_4X4, TX_8X8 TX_16X16
@@ -351,28 +369,26 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
        sz = TX_32X32;
      }
      const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    int count = (4 << sz) * (4 << sz);
+    scan = &vp9_scan_orders[sz][tx_type];
+    count = (4 << sz) * (4 << sz);
      // Two random entries
      coeff.Set(0);
-    coeff.TopLeftPixel()[rnd(count)] =
+    coeff.TopLeftPixel()[rnd.RandRange(count) & max_index] =
          static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
-    coeff.TopLeftPixel()[rnd(count)] =
+    coeff.TopLeftPixel()[rnd.RandRange(count) & max_index] =
          static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
      GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
                           quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
                           quant_fp_ptr_);
-    int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
-    int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
      ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
                       q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
                       ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
-                     scan_order->scan, scan_order->iscan);
+                     scan->scan, scan->iscan);
  
      ASM_REGISTER_STATE_CHECK(quantize_op_(
          coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
          quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
-        dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
+        dequant_ptr_, &eob, scan->scan, scan->iscan));
  
      EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
      EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -390,13 +406,9 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
  
  TEST_P(VP9QuantizeTest, DISABLED_Speed) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
    ASSERT_TRUE(coeff.Init());
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
    ASSERT_TRUE(qcoeff.Init());
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
    ASSERT_TRUE(dqcoeff.Init());
-  uint16_t eob;
    TX_SIZE starting_sz, ending_sz;
  
    if (max_size_ == 16) {
@@ -410,18 +422,16 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
    for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
      // zbin > coeff, zbin < coeff.
      for (int i = 0; i < 2; ++i) {
-      const int skip_block = 0;
+      skip_block = 0;
        // TX_TYPE defines the scan order. That is not relevant to the speed test.
        // Pick the first one.
        const TX_TYPE tx_type = DCT_DCT;
-      const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-      const int count = (4 << sz) * (4 << sz);
+      count = (4 << sz) * (4 << sz);
+      scan = &vp9_scan_orders[sz][tx_type];
  
        GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
                             quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
                             quant_fp_ptr_);
-      int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
-      int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
  
        if (i == 0) {
          // When |coeff values| are less than zbin the results are 0.
@@ -438,22 +448,15 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
          coeff.Set(&rnd, -500, 500);
        }
  
-      vpx_usec_timer timer;
-      vpx_usec_timer_start(&timer);
-      for (int j = 0; j < 100000000 / count; ++j) {
-        quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
-                     q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
-                     dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
-                     scan_order->scan, scan_order->iscan);
-      }
-      vpx_usec_timer_mark(&timer);
-      const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
-      if (i == 0) printf("Bypass calculations.\n");
-      if (i == 1) printf("Full calculations.\n");
-      printf("Quantize %dx%d time: %5d ms\n", 4 << sz, 4 << sz,
-             elapsed_time / 1000);
+      runNTimes(10000000 / count);
+      const char *type =
+          (i == 0) ? "Bypass calculations " : "Full calculations ";
+      char block_size[16];
+      snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz);
+      char title[100];
+      snprintf(title, sizeof(title), "%25s %8s ", type, block_size);
+      printMedian(title);
      }
-    printf("\n");
    }
  }
author	Luc Trudeau <luc@trud.ca>
	Tue, 22 May 2018 18:16:15 +0000 (14:16 -0400)
committer	Luc Trudeau <luc@trud.ca>
	Tue, 29 May 2018 13:04:47 +0000 (13:04 +0000)
test/bench.cc	[new file with mode: 0644]	patch \| blob
test/bench.h	[new file with mode: 0644]	patch \| blob
test/test.mk		patch \| blob \| history
test/vp9_quantize_test.cc		patch \| blob \| history