From: Angie Chiang <angiebird@google.com>
Date: Fri, 6 Nov 2015 19:15:54 +0000 (-0800)
Subject: Add vp10_inv_txfm2d
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4fd0ba8f6f3af26dbffaa90afc711053ddcdad2c;p=libvpx

Add vp10_inv_txfm2d

Change-Id: Ib63062a52c688e65bae5eb0052ce69d73d96c9c5
---

diff --git a/test/test.mk b/test/test.mk
index face2ad67..277bb8455 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -174,6 +174,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_txfm_test.h
 LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm1d_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm1d_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm2d_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm2d_test.cc
 
 endif # CONFIG_SHARED
 
diff --git a/test/vp10_inv_txfm2d_test.cc b/test/vp10_inv_txfm2d_test.cc
new file mode 100644
index 000000000..603821ec9
--- /dev/null
+++ b/test/vp10_inv_txfm2d_test.cc
@@ -0,0 +1,115 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "test/vp10_txfm_test.h"
+#include "vp10/common/vp10_fwd_txfm2d.h"
+#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
+#include "vp10/common/vp10_inv_txfm2d.h"
+#include "vp10/common/vp10_inv_txfm2d_cfg.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+
+const int txfm_size_num = 4;
+const int txfm_size_ls[4] = {4, 8, 16, 32};
+const TXFM_2D_CFG fwd_txfm_cfg_ls[4][4] = {
+    {fwd_txfm_2d_cfg_dct_dct_4, fwd_txfm_2d_cfg_dct_adst_4,
+     fwd_txfm_2d_cfg_adst_adst_4, fwd_txfm_2d_cfg_adst_dct_4},
+    {fwd_txfm_2d_cfg_dct_dct_8, fwd_txfm_2d_cfg_dct_adst_8,
+     fwd_txfm_2d_cfg_adst_adst_8, fwd_txfm_2d_cfg_adst_dct_8},
+    {fwd_txfm_2d_cfg_dct_dct_16, fwd_txfm_2d_cfg_dct_adst_16,
+     fwd_txfm_2d_cfg_adst_adst_16, fwd_txfm_2d_cfg_adst_dct_16},
+    {fwd_txfm_2d_cfg_dct_dct_32, fwd_txfm_2d_cfg_dct_adst_32,
+     fwd_txfm_2d_cfg_adst_adst_32, fwd_txfm_2d_cfg_adst_dct_32}};
+
+const TXFM_2D_CFG inv_txfm_cfg_ls[4][4] = {
+    {inv_txfm_2d_cfg_dct_dct_4, inv_txfm_2d_cfg_dct_adst_4,
+     inv_txfm_2d_cfg_adst_adst_4, inv_txfm_2d_cfg_adst_dct_4},
+    {inv_txfm_2d_cfg_dct_dct_8, inv_txfm_2d_cfg_dct_adst_8,
+     inv_txfm_2d_cfg_adst_adst_8, inv_txfm_2d_cfg_adst_dct_8},
+    {inv_txfm_2d_cfg_dct_dct_16, inv_txfm_2d_cfg_dct_adst_16,
+     inv_txfm_2d_cfg_adst_adst_16, inv_txfm_2d_cfg_adst_dct_16},
+    {inv_txfm_2d_cfg_dct_dct_32, inv_txfm_2d_cfg_dct_adst_32,
+     inv_txfm_2d_cfg_adst_adst_32, inv_txfm_2d_cfg_adst_dct_32}};
+
+const Fwd_Txfm2d_Func fwd_txfm_func_ls[4] = {
+    vp10_fwd_txfm2d_4x4, vp10_fwd_txfm2d_8x8, vp10_fwd_txfm2d_16x16,
+    vp10_fwd_txfm2d_32x32};
+const Inv_Txfm2d_Func inv_txfm_func_ls[4] = {
+    vp10_inv_txfm2d_add_4x4, vp10_inv_txfm2d_add_8x8, vp10_inv_txfm2d_add_16x16,
+    vp10_inv_txfm2d_add_32x32};
+
+const int txfm_type_num = 4;
+
+TEST(vp10_inv_txfm2d, round_trip) {
+  for (int txfm_size_idx = 0; txfm_size_idx < txfm_size_num; ++txfm_size_idx) {
+    const int txfm_size = txfm_size_ls[txfm_size_idx];
+    const int sqr_txfm_size = txfm_size * txfm_size;
+    int16_t* input = new int16_t[sqr_txfm_size];
+    uint16_t* ref_input = new uint16_t[sqr_txfm_size];
+    int32_t* output = new int32_t[sqr_txfm_size];
+
+    for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num;
+         ++txfm_type_idx) {
+      const TXFM_2D_CFG fwd_txfm_cfg =
+          fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
+      const TXFM_2D_CFG inv_txfm_cfg =
+          inv_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
+      const Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
+      const Inv_Txfm2d_Func inv_txfm_func = inv_txfm_func_ls[txfm_size_idx];
+      const int count = 5000;
+      double avg_abs_error = 0;
+      ACMRandom rnd(ACMRandom::DeterministicSeed());
+      for (int ci = 0; ci < count; ci++) {
+        for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+          if (ci == 0) {
+            int extreme_input = base - 1;
+            input[ni] = extreme_input;  // extreme case
+            ref_input[ni] = 0;
+          } else {
+            input[ni] = rnd.Rand16() % base;
+            ref_input[ni] = 0;
+          }
+        }
+
+        fwd_txfm_func(input, output, txfm_size, &fwd_txfm_cfg, bd);
+        inv_txfm_func(output, ref_input, txfm_size, &inv_txfm_cfg, bd);
+
+        for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+          EXPECT_LE(abs(input[ni] - ref_input[ni]), 2);
+        }
+        avg_abs_error += compute_avg_abs_error<int16_t, uint16_t>(
+            input, ref_input, sqr_txfm_size);
+      }
+
+      avg_abs_error /= count;
+      // max_abs_avg_error comes from upper bound of
+      // printf("txfm_size: %d accuracy_avg_abs_error: %f\n", txfm_size,
+      // avg_abs_error);
+      // TODO(angiebird): this upper bound is from adst_adst_8
+      const double max_abs_avg_error = 0.024;
+      EXPECT_LE(avg_abs_error, max_abs_avg_error);
+    }
+
+    delete[] input;
+    delete[] ref_input;
+    delete[] output;
+  }
+}
+
+}  // anonymous namespace
diff --git a/vp10/common/vp10_inv_txfm2d.c b/vp10/common/vp10_inv_txfm2d.c
new file mode 100644
index 000000000..c894a42b2
--- /dev/null
+++ b/vp10/common/vp10_inv_txfm2d.c
@@ -0,0 +1,98 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp10/common/vp10_txfm.h"
+
+static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
+                                    int stride, const TXFM_2D_CFG *cfg,
+                                    int32_t *txfm_buf) {
+  const int txfm_size = cfg->txfm_size;
+  const int8_t *shift = cfg->shift;
+  const int8_t *stage_range_col = cfg->stage_range_col;
+  const int8_t *stage_range_row = cfg->stage_range_row;
+  const int8_t *cos_bit_col = cfg->cos_bit_col;
+  const int8_t *cos_bit_row = cfg->cos_bit_row;
+  const TxfmFunc txfm_func_col = cfg->txfm_func_col;
+  const TxfmFunc txfm_func_row = cfg->txfm_func_row;
+
+  // txfm_buf's length is  txfm_size * txfm_size + 2 * txfm_size
+  // it is used for intermediate data buffering
+  int32_t *temp_in = txfm_buf;
+  int32_t *temp_out = temp_in + txfm_size;
+  int32_t *buf = temp_out + txfm_size;
+  int32_t *buf_ptr = buf;
+  int i, j;
+
+  // Rows
+  for (i = 0; i < txfm_size; ++i) {
+    txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row);
+    round_shift_array(buf_ptr, txfm_size, -shift[0]);
+    input += txfm_size;
+    buf_ptr += txfm_size;
+  }
+
+  // Columns
+  for (i = 0; i < txfm_size; ++i) {
+    for (j = 0; j < txfm_size; ++j)
+      temp_in[j] = buf[j * txfm_size + i];
+    txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
+    round_shift_array(temp_out, txfm_size, -shift[1]);
+    for (j = 0; j < txfm_size; ++j)
+      output[j * stride + i] += temp_out[j];
+  }
+}
+
+void vp10_inv_txfm2d_add_4x4(const int32_t *input, uint16_t *output,
+                             const int stride, const TXFM_2D_CFG *cfg,
+                             const int bd) {
+  int txfm_buf[4 * 4 + 4 + 4];
+  // output contains the prediction signal which is always positive and smaller
+  // than (1 << bd) - 1
+  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+  // int16_t*
+  inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+  clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_8x8(const int32_t *input, uint16_t *output,
+                             const int stride, const TXFM_2D_CFG *cfg,
+                             const int bd) {
+  int txfm_buf[8 * 8 + 8 + 8];
+  // output contains the prediction signal which is always positive and smaller
+  // than (1 << bd) - 1
+  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+  // int16_t*
+  inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+  clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_16x16(const int32_t *input, uint16_t *output,
+                               const int stride, const TXFM_2D_CFG *cfg,
+                               const int bd) {
+  int txfm_buf[16 * 16 + 16 + 16];
+  // output contains the prediction signal which is always positive and smaller
+  // than (1 << bd) - 1
+  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+  // int16_t*
+  inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+  clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_32x32(const int32_t *input, uint16_t *output,
+                               const int stride, const TXFM_2D_CFG *cfg,
+                               const int bd) {
+  int txfm_buf[32 * 32 + 32 + 32];
+  // output contains the prediction signal which is always positive and smaller
+  // than (1 << bd) - 1
+  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+  // int16_t*
+  inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+  clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1);
+}
diff --git a/vp10/common/vp10_inv_txfm2d.h b/vp10/common/vp10_inv_txfm2d.h
new file mode 100644
index 000000000..1b570efcd
--- /dev/null
+++ b/vp10/common/vp10_inv_txfm2d.h
@@ -0,0 +1,33 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_INV_TXFM2D_C_H_
+#define VP10_INV_TXFM2D_C_H_
+
+#include "vp10/common/vp10_inv_txfm2d_cfg.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+void vp10_inv_txfm2d_add_4x4(const int32_t *input, uint16_t *output,
+                             const int stride, const TXFM_2D_CFG *cfg,
+                             const int bd);
+void vp10_inv_txfm2d_add_8x8(const int32_t *input, uint16_t *output,
+                             const int stride, const TXFM_2D_CFG *cfg,
+                             const int bd);
+void vp10_inv_txfm2d_add_16x16(const int32_t *input, uint16_t *output,
+                               const int stride, const TXFM_2D_CFG *cfg,
+                               const int bd);
+void vp10_inv_txfm2d_add_32x32(const int32_t *input, uint16_t *output,
+                               const int stride, const TXFM_2D_CFG *cfg,
+                               const int bd);
+#ifdef __cplusplus
+}
+#endif
+#endif  // VP10_INV_TXFM2D_C_H_
diff --git a/vp10/common/vp10_txfm.h b/vp10/common/vp10_txfm.h
index 427bccb10..b4fd75343 100644
--- a/vp10/common/vp10_txfm.h
+++ b/vp10/common/vp10_txfm.h
@@ -151,17 +151,17 @@ typedef void (*TxfmFunc)(const int32_t *input, int32_t *output,
                          const int8_t *cos_bit, const int8_t *stage_range);
 
 typedef struct TXFM_2D_CFG {
-  int txfm_size;
-  int stage_num_col;
-  int stage_num_row;
+  const int txfm_size;
+  const int stage_num_col;
+  const int stage_num_row;
 
-  int8_t *shift;
-  int8_t *stage_range_col;
-  int8_t *stage_range_row;
-  int8_t *cos_bit_col;
-  int8_t *cos_bit_row;
-  TxfmFunc txfm_func_col;
-  TxfmFunc txfm_func_row;
+  const int8_t *shift;
+  const int8_t *stage_range_col;
+  const int8_t *stage_range_row;
+  const int8_t *cos_bit_col;
+  const int8_t *cos_bit_row;
+  const TxfmFunc txfm_func_col;
+  const TxfmFunc txfm_func_row;
 } TXFM_2D_CFG;
 
 #endif  // VP10_TXFM_H_
diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk
index 461815c91..f8c211279 100644
--- a/vp10/vp10_common.mk
+++ b/vp10/vp10_common.mk
@@ -71,6 +71,8 @@ VP10_COMMON_SRCS-yes += common/vp10_inv_txfm1d.c
 VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d.h
 VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d.c
 VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d_cfg.h
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.h
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.c
 VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d_cfg.h
 
 VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h