Add fdct4

author Angie Chiang <angiebird@google.com>

Tue, 27 Oct 2015 23:16:30 +0000 (16:16 -0700)

committer Angie Chiang <angiebird@google.com>

Thu, 29 Oct 2015 21:05:25 +0000 (14:05 -0700)
author Angie Chiang <angiebird@google.com>
Tue, 27 Oct 2015 23:16:30 +0000 (16:16 -0700)
committer Angie Chiang <angiebird@google.com>
Thu, 29 Oct 2015 21:05:25 +0000 (14:05 -0700)
diff --git a/vp10/common/vp10_fwd_txfm1d.c b/vp10/common/vp10_fwd_txfm1d.c

new file mode 100644 (file)

index 0000000..ebc9fcd
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm1d.c
@@ -0,0 +1,83 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp10/common/vp10_fwd_txfm1d.h"
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#define range_check(stage, input, buf, size, bit)                         \
+  {                                                                       \
+    int i, j;                                                             \
+    for (i = 0; i < size; ++i) {                                          \
+      int buf_bit = get_max_bit(abs(buf[i])) + 1;                         \
+      if (buf_bit > bit) {                                                \
+        printf("======== %s overflow ========\n", __func__);              \
+        printf("stage: %d node: %d\n", stage, i);                         \
+        printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \
+        printf("input:\n");                                               \
+        for (j = 0; j < size; j++) {                                      \
+          printf("%d,", input[j]);                                        \
+        }                                                                 \
+        printf("\n");                                                     \
+        assert(0, "vp10_fwd_txfm1d.c: range_check overflow");             \
+      }                                                                   \
+    }                                                                     \
+  }
+#else
+#define range_check(stage, input, buf, size, bit) \
+  {                                               \
+    (void) stage;                                 \
+    (void) input;                                 \
+    (void) buf;                                   \
+    (void) size;                                  \
+    (void) bit;                                   \
+  }
+#endif
+
+void vp10_fdct4_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 4;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[4];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0] + input[3];
+  bf1[1] = input[1] + input[2];
+  bf1[2] = -input[2] + input[1];
+  bf1[3] = -input[3] + input[0];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[2];
+  bf1[2] = bf0[1];
+  bf1[3] = bf0[3];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
diff --git a/vp10/common/vp10_fwd_txfm1d.h b/vp10/common/vp10_fwd_txfm1d.h

new file mode 100644 (file)

index 0000000..fe786a5
--- /dev/null
+++ b/vp10/common/vp10_fwd_txfm1d.h
@@ -0,0 +1,27 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_FWD_TXFM1D_H_
+#define VP10_FWD_TXFM1D_H_
+
+#include "vp10/common/vp10_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_fdct4_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // VP10_FWD_TXFM1D_H_
diff --git a/vp10/common/vp10_txfm.h b/vp10/common/vp10_txfm.h

new file mode 100644 (file)

index 0000000..427bccb
--- /dev/null
+++ b/vp10/common/vp10_txfm.h
@@ -0,0 +1,167 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_TXFM_H_
+#define VP10_TXFM_H_
+
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+static const int cos_bit_min = 10;
+static const int cos_bit_max = 16;
+
+// cospi_arr[i][j] = (int)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i)));
+static const int32_t cospi_arr[7][64] =
+  {{ 1024,  1024,  1023,  1021,  1019,  1016,  1013,  1009,
+     1004,   999,   993,   987,   980,   972,   964,   955,
+      946,   936,   926,   915,   903,   891,   878,   865,
+      851,   837,   822,   807,   792,   775,   759,   742,
+      724,   706,   688,   669,   650,   630,   610,   590,
+      569,   548,   526,   505,   483,   460,   438,   415,
+      392,   369,   345,   321,   297,   273,   249,   224,
+      200,   175,   150,   125,   100,    75,    50,    25},
+  {  2048,  2047,  2046,  2042,  2038,  2033,  2026,  2018,
+     2009,  1998,  1987,  1974,  1960,  1945,  1928,  1911,
+     1892,  1872,  1851,  1829,  1806,  1782,  1757,  1730,
+     1703,  1674,  1645,  1615,  1583,  1551,  1517,  1483,
+     1448,  1412,  1375,  1338,  1299,  1260,  1220,  1179,
+     1138,  1096,  1053,  1009,   965,   921,   876,   830,
+      784,   737,   690,   642,   595,   546,   498,   449,
+      400,   350,   301,   251,   201,   151,   100,    50},
+  {  4096,  4095,  4091,  4085,  4076,  4065,  4052,  4036,
+     4017,  3996,  3973,  3948,  3920,  3889,  3857,  3822,
+     3784,  3745,  3703,  3659,  3612,  3564,  3513,  3461,
+     3406,  3349,  3290,  3229,  3166,  3102,  3035,  2967,
+     2896,  2824,  2751,  2675,  2598,  2520,  2440,  2359,
+     2276,  2191,  2106,  2019,  1931,  1842,  1751,  1660,
+     1567,  1474,  1380,  1285,  1189,  1092,   995,   897,
+      799,   700,   601,   501,   401,   301,   201,   101},
+  {  8192,  8190,  8182,  8170,  8153,  8130,  8103,  8071,
+     8035,  7993,  7946,  7895,  7839,  7779,  7713,  7643,
+     7568,  7489,  7405,  7317,  7225,  7128,  7027,  6921,
+     6811,  6698,  6580,  6458,  6333,  6203,  6070,  5933,
+     5793,  5649,  5501,  5351,  5197,  5040,  4880,  4717,
+     4551,  4383,  4212,  4038,  3862,  3683,  3503,  3320,
+     3135,  2948,  2760,  2570,  2378,  2185,  1990,  1795,
+     1598,  1401,  1202,  1003,   803,   603,   402,   201},
+  { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143,
+    16069, 15986, 15893, 15791, 15679, 15557, 15426, 15286,
+    15137, 14978, 14811, 14635, 14449, 14256, 14053, 13842,
+    13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866,
+    11585, 11297, 11003, 10702, 10394, 10080,  9760,  9434,
+     9102,  8765,  8423,  8076,  7723,  7366,  7005,  6639,
+     6270,  5897,  5520,  5139,  4756,  4370,  3981,  3590,
+     3196,  2801,  2404,  2006,  1606,  1205,   804,   402},
+  { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286,
+    32138, 31972, 31786, 31581, 31357, 31114, 30853, 30572,
+    30274, 29957, 29622, 29269, 28899, 28511, 28106, 27684,
+    27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732,
+    23170, 22595, 22006, 21403, 20788, 20160, 19520, 18868,
+    18205, 17531, 16846, 16151, 15447, 14733, 14010, 13279,
+    12540, 11793, 11039, 10279,  9512,  8740,  7962,  7180,
+     6393,  5602,  4808,  4011,  3212,  2411,  1608,   804},
+  { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571,
+    64277, 63944, 63572, 63162, 62714, 62228, 61705, 61145,
+    60547, 59914, 59244, 58538, 57798, 57022, 56212, 55368,
+    54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464,
+    46341, 45190, 44011, 42806, 41576, 40320, 39040, 37736,
+    36410, 35062, 33692, 32303, 30893, 29466, 28020, 26558,
+    25080, 23586, 22078, 20557, 19024, 17479, 15924, 14359,
+    12785, 11204,  9616,  8022,  6424,  4821,  3216,  1608}};
+
+static INLINE int32_t round_shift(int32_t value, int bit) {
+  // For value >= 0,
+  // there are twe version of rounding
+  // 1) (value + (1 << (bit - 1)) - 1) >> bit
+  // 2) (value + (1 << (bit - 1))) >> bit
+  // boath methods are mild unbiased
+  // however, the first version has slightly advantage because
+  // it rounds number toward zero.
+  // For value < 0, we also choose the version that rounds number
+  // toward zero.
+  if (bit > 0) {
+    if (value >= 0)
+      return (value + (1 << (bit - 1)) - 1) >> bit;
+    else
+      return ((value - (1 << (bit - 1))) >> bit) + 1;
+  } else {
+    return value << (-bit);
+  }
+}
+
+static INLINE void round_shift_array(int32_t *arr, int size, int bit) {
+  int i;
+  if (bit == 0) {
+    return;
+  } else {
+    for (i = 0; i < size; i++) {
+      arr[i] = round_shift(arr[i], bit);
+    }
+  }
+}
+
+static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1,
+                               int bit) {
+  int32_t result_32 = w0 * in0 + w1 * in1;
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+  int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1;
+  if (result_32 != result_64) {
+    printf(
+        "%s overflow result_32: %d result_64: %ld w0: %d in0: %d w1: %d in1: "
+        "%d\n",
+        __func__, result_32, result_64, w0, in0, w1, in1);
+    assert(0 && "half_btf overflow");
+  }
+#endif
+  return round_shift(result_32, bit);
+}
+
+static INLINE int get_max_bit(int x) {
+  int max_bit = -1;
+  while (x) {
+    x = x >> 1;
+    max_bit++;
+  }
+  return max_bit;
+}
+
+// TODO(angiebird): implement SSE
+static INLINE void clamp_block(int16_t *block, int block_size, int stride,
+                               int low, int high) {
+  int i, j;
+  for (i = 0; i < block_size; ++i) {
+    for (j = 0; j < block_size; ++j) {
+      block[i * stride + j] = clamp(block[i * stride + j], low, high);
+    }
+  }
+}
+
+typedef void (*TxfmFunc)(const int32_t *input, int32_t *output,
+                         const int8_t *cos_bit, const int8_t *stage_range);
+
+typedef struct TXFM_2D_CFG {
+  int txfm_size;
+  int stage_num_col;
+  int stage_num_row;
+
+  int8_t *shift;
+  int8_t *stage_range_col;
+  int8_t *stage_range_row;
+  int8_t *cos_bit_col;
+  int8_t *cos_bit_row;
+  TxfmFunc txfm_func_col;
+  TxfmFunc txfm_func_row;
+} TXFM_2D_CFG;
+
+#endif  // VP10_TXFM_H_
diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk

index 2eb348873b9ff0afd83af8dac5f82099f439744a..05bdb70d5707e2a006ce7c010c27c33714d627f1 100644 (file)
--- a/vp10/vp10_common.mk
+++ b/vp10/vp10_common.mk
@@ -63,6 +63,9 @@ VP10_COMMON_SRCS-yes += common/scan.c
  VP10_COMMON_SRCS-yes += common/scan.h
  VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.h
  VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.c
+VP10_COMMON_SRCS-yes += common/vp10_txfm.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm1d.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm1d.c
  
  VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h
  VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c
author	Angie Chiang <angiebird@google.com>
	Tue, 27 Oct 2015 23:16:30 +0000 (16:16 -0700)
committer	Angie Chiang <angiebird@google.com>
	Thu, 29 Oct 2015 21:05:25 +0000 (14:05 -0700)
vp10/common/vp10_fwd_txfm1d.c	[new file with mode: 0644]	patch \| blob
vp10/common/vp10_fwd_txfm1d.h	[new file with mode: 0644]	patch \| blob
vp10/common/vp10_txfm.h	[new file with mode: 0644]	patch \| blob
vp10/vp10_common.mk		patch \| blob \| history