Extends ext-tx to support 32x32 masked transforms

author Debargha Mukherjee <debargha@google.com>

Sat, 13 Feb 2016 00:44:33 +0000 (16:44 -0800)

committer Debargha Mukherjee <debargha@google.com>

Wed, 17 Feb 2016 17:31:34 +0000 (09:31 -0800)
author Debargha Mukherjee <debargha@google.com>
Sat, 13 Feb 2016 00:44:33 +0000 (16:44 -0800)
committer Debargha Mukherjee <debargha@google.com>
Wed, 17 Feb 2016 17:31:34 +0000 (09:31 -0800)
diff --git a/vp10/common/idct.c b/vp10/common/idct.c

index 6f38f74a75ab8be4b53ce5c723f4735dcc99260e..dbb50fbbace6b9ed27d2f6fc820348621cc1f097 100644 (file)
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -259,6 +259,73 @@ void idst16_c(const tran_low_t *input, tran_low_t *output) {
    output[15] = WRAPLOW(-step2[0] + step2[15], 8);
  }
  
+#if CONFIG_EXT_TX
+// For use in lieu of DST
+static void ihalfcenter32_c(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  tran_low_t inputhalf[16];
+  for (i = 0; i < 8; ++i) {
+    output[i] = input[16 + i] * 4;
+    output[24 + i] = input[24 + i] * 4;
+  }
+  // Multiply input by sqrt(2)
+  for (i = 0; i < 16; ++i) {
+    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+  }
+  idct16_c(inputhalf, output + 8);
+  // Note overall scaling factor is 4 times orthogonal
+}
+
+static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  tran_low_t inputhalf[16];
+  for (i = 0; i < 16; ++i) {
+    output[i] = input[16 + i] * 4;
+  }
+  // Multiply input by sqrt(2)
+  for (i = 0; i < 16; ++i) {
+    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+  }
+  idct16_c(inputhalf, output + 16);
+  // Note overall scaling factor is 4 times orthogonal
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_ihalfcenter32_c(const tran_low_t *input, tran_low_t *output,
+                                   int bd) {
+  int i;
+  tran_low_t inputhalf[16];
+  for (i = 0; i < 8; ++i) {
+    output[i] = input[16 + i] * 4;
+    output[24 + i] = input[24 + i] * 4;
+  }
+  // Multiply input by sqrt(2)
+  for (i = 0; i < 16; ++i) {
+    inputhalf[i] = (tran_low_t)highbd_dct_const_round_shift(
+        input[i] * Sqrt2, bd);
+  }
+  vpx_highbd_idct16_c(inputhalf, output + 8, bd);
+  // Note overall scaling factor is 4 times orthogonal
+}
+
+static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
+                                  int bd) {
+  int i;
+  tran_low_t inputhalf[16];
+  for (i = 0; i < 16; ++i) {
+    output[i] = input[16 + i] * 4;
+  }
+  // Multiply input by sqrt(2)
+  for (i = 0; i < 16; ++i) {
+    inputhalf[i] = (tran_low_t)highbd_dct_const_round_shift(
+        input[i] * Sqrt2, bd);
+  }
+  vpx_highbd_idct16_c(inputhalf, output + 16, bd);
+  // Note overall scaling factor is 4 times orthogonal
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_EXT_TX
+
  // Inverse identiy transform and add.
  static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                             int bs) {
@@ -808,6 +875,67 @@ void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
    }
  }
  
+#if CONFIG_EXT_TX
+void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
+                              int stride, int tx_type) {
+  static const transform_2d IHT_32[] = {
+    { idct32_c,  idct32_c  },                // DCT_DCT           = 0,
+    { ihalfright32_c, idct32_c  },           // ADST_DCT          = 1,
+    { idct32_c,  ihalfright32_c },           // DCT_ADST          = 2,
+    { ihalfright32_c, ihalfright32_c },      // ADST_ADST         = 3,
+    { ihalfright32_c, idct32_c  },           // FLIPADST_DCT      = 4,
+    { idct32_c,  ihalfright32_c },           // DCT_FLIPADST      = 5,
+    { ihalfright32_c, ihalfright32_c },      // FLIPADST_FLIPADST = 6,
+    { ihalfright32_c, ihalfright32_c },      // ADST_FLIPADST     = 7,
+    { ihalfright32_c, ihalfright32_c },      // FLIPADST_ADST     = 8,
+    { ihalfcenter32_c,  idct32_c  },         // DST_DCT           = 9,
+    { idct32_c,  ihalfcenter32_c  },         // DCT_DST           = 10,
+    { ihalfcenter32_c,  ihalfright32_c },    // DST_ADST          = 11,
+    { ihalfright32_c, ihalfcenter32_c  },    // ADST_DST          = 12,
+    { ihalfcenter32_c,  ihalfright32_c },    // DST_FLIPADST      = 13,
+    { ihalfright32_c, ihalfcenter32_c  },    // FLIPADST_DST      = 14,
+    { ihalfcenter32_c,  ihalfcenter32_c  },  // DST_DST           = 15
+  };
+
+  int i, j;
+  tran_low_t tmp;
+  tran_low_t out[32][32];
+  tran_low_t *outp = &out[0][0];
+  int outstride = 32;
+
+  // inverse transform row vectors
+  for (i = 0; i < 32; ++i) {
+    IHT_32[tx_type].rows(input, out[i]);
+    input  += 32;
+  }
+
+  // transpose
+  for (i = 1 ; i < 32; i++) {
+    for (j = 0; j < i; j++) {
+            tmp = out[i][j];
+      out[i][j] = out[j][i];
+      out[j][i] = tmp;
+    }
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 32; ++i) {
+    IHT_32[tx_type].cols(out[i], out[i]);
+  }
+
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32);
+
+  // Sum with the destination
+  for (i = 0; i < 32; ++i) {
+    for (j = 0; j < 32; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+    }
+  }
+}
+#endif  // CONFIG_EXT_TX
+
  // idct
  void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
                       int eob) {
@@ -998,15 +1126,27 @@ void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
        vp10_idct32x32_add(input, dest, stride, eob);
        break;
  #if CONFIG_EXT_TX
-    case IDTX:
-      inv_idtx_add_c(input, dest, stride, 32);
-      break;
-#endif  // CONFIG_EXT_TX
      case ADST_DCT:
      case DCT_ADST:
      case ADST_ADST:
-      assert(0);
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST:
+    case DST_DST:
+    case DST_DCT:
+    case DCT_DST:
+    case DST_ADST:
+    case ADST_DST:
+    case FLIPADST_DST:
+    case DST_FLIPADST:
+      vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
        break;
+    case IDTX:
+      inv_idtx_add_c(input, dest, stride, 32);
+      break;
+#endif  // CONFIG_EXT_TX
      default:
        assert(0);
        break;
@@ -1212,6 +1352,70 @@ void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
    }
  }
  
+#if CONFIG_EXT_TX
+void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
+                                     int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_32[] = {
+    { vpx_highbd_idct32_c, vpx_highbd_idct32_c  },        // DCT_DCT
+    { highbd_ihalfright32_c, vpx_highbd_idct32_c  },      // ADST_DCT
+    { vpx_highbd_idct32_c, highbd_ihalfright32_c },       // DCT_ADST
+    { highbd_ihalfright32_c, highbd_ihalfright32_c },     // ADST_ADST
+    { highbd_ihalfright32_c, vpx_highbd_idct32_c  },      // FLIPADST_DCT
+    { vpx_highbd_idct32_c, highbd_ihalfright32_c },       // DCT_FLIPADST
+    { highbd_ihalfright32_c, highbd_ihalfright32_c },     // FLIPADST_FLIPADST
+    { highbd_ihalfright32_c, highbd_ihalfright32_c },     // ADST_FLIPADST
+    { highbd_ihalfright32_c, highbd_ihalfright32_c },     // FLIPADST_ADST
+    { highbd_ihalfcenter32_c, vpx_highbd_idct32_c  },     // DST_DCT
+    { vpx_highbd_idct32_c, highbd_ihalfcenter32_c  },     // DCT_DST
+    { highbd_ihalfcenter32_c, highbd_ihalfright32_c },    // DST_ADST
+    { highbd_ihalfright32_c, highbd_ihalfcenter32_c  },   // ADST_DST
+    { highbd_ihalfcenter32_c, highbd_ihalfright32_c },    // DST_FLIPADST
+    { highbd_ihalfright32_c, highbd_ihalfcenter32_c  },   // FLIPADST_DST
+    { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c  },  // DST_DST
+  };
+
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  int i, j;
+  tran_low_t tmp;
+  tran_low_t out[32][32];
+  tran_low_t *outp = &out[0][0];
+  int outstride = 32;
+
+  // inverse transform row vectors
+  for (i = 0; i < 32; ++i) {
+    HIGH_IHT_32[tx_type].rows(input, out[i], bd);
+    input  += 32;
+  }
+
+  // transpose
+  for (i = 1 ; i < 32; i++) {
+    for (j = 0; j < i; j++) {
+            tmp = out[i][j];
+      out[i][j] = out[j][i];
+      out[j][i] = tmp;
+    }
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 32; ++i) {
+    HIGH_IHT_32[tx_type].cols(out[i], out[i], bd);
+  }
+
+  maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32);
+
+  // Sum with the destination
+  for (i = 0; i < 32; ++i) {
+    for (j = 0; j < 32; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = highbd_clip_pixel_add(dest[d],
+                                      ROUND_POWER_OF_TWO(outp[s], 6), bd);
+    }
+  }
+}
+#endif  // CONFIG_EXT_TX
+
  // idct
  void vp10_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
                              int eob, int bd) {
@@ -1409,15 +1613,27 @@ void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
        vp10_highbd_idct32x32_add(input, dest, stride, eob, bd);
        break;
  #if CONFIG_EXT_TX
-    case IDTX:
-      highbd_inv_idtx_add_c(input, dest, stride, 32, bd);
-      break;
-#endif  // CONFIG_EXT_TX
      case ADST_DCT:
      case DCT_ADST:
      case ADST_ADST:
-      assert(0);
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST:
+    case DST_DST:
+    case DST_DCT:
+    case DCT_DST:
+    case DST_ADST:
+    case ADST_DST:
+    case FLIPADST_DST:
+    case DST_FLIPADST:
+      vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
        break;
+    case IDTX:
+      highbd_inv_idtx_add_c(input, dest, stride, 32, bd);
+      break;
+#endif  // CONFIG_EXT_TX
      default:
        assert(0);
        break;
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl

index 9860baedfe8aa16ad5c433ed8700ec656aa812da..c9f02953fd6a2bcf65667dad76f407b0fee2660e 100644 (file)
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -404,6 +404,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
    specialize qw/vp10_fht16x16 sse2/;
  
+  add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht32x32/;
+
    add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/vp10_fwht4x4/, "$mmx_x86inc";
  } else {
@@ -416,6 +419,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
    specialize qw/vp10_fht16x16 sse2 msa/;
  
+  add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht32x32/;
+
    add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
  }
@@ -642,6 +648,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    add_proto qw/void vp10_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
    specialize qw/vp10_highbd_fht16x16/;
  
+  add_proto qw/void vp10_highbd_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_highbd_fht32x32/;
+
    add_proto qw/void vp10_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/vp10_highbd_fwht4x4/;
  
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c

index cdb732a44c5fe17d29074d7159d8fe7fc5fd2206..333adbbcb85cab90f791e721a6743c6c7ebb4da3 100644 (file)
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -14,7 +14,6 @@
  #include "./vp10_rtcd.h"
  #include "./vpx_config.h"
  #include "./vpx_dsp_rtcd.h"
-
  #include "vp10/common/blockd.h"
  #include "vp10/common/idct.h"
  #include "vpx_dsp/fwd_txfm.h"
@@ -538,7 +537,7 @@ static void fdct16(const tran_low_t *input, tran_low_t *output) {
    range_check(output, 16, 16);
  }
  
-/* TODO(angiebird): Unify this with vp10_fwd_txfm.c: vp10_fdct32
+#if CONFIG_EXT_TX
  static void fdct32(const tran_low_t *input, tran_low_t *output) {
    tran_high_t temp;
    tran_low_t step[32];
@@ -936,7 +935,7 @@ static void fdct32(const tran_low_t *input, tran_low_t *output) {
  
    range_check(output, 32, 18);
  }
-*/
+#endif  // CONFIG_EXT_TX
  
  static void fadst4(const tran_low_t *input, tran_low_t *output) {
    tran_high_t x0, x1, x2, x3;
@@ -1213,6 +1212,37 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
  }
  
  #if CONFIG_EXT_TX
+// For use in lieu of DST
+static void fhalfcenter32(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  tran_low_t inputhalf[16];
+  for (i = 0; i < 8; ++i) {
+    output[16 + i] = input[i] * 4;
+    output[24 + i] = input[24 + i] * 4;
+  }
+  // Multiply input by sqrt(2)
+  for (i = 0; i < 16; ++i) {
+    inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 8] * Sqrt2);
+  }
+  fdct16(inputhalf, output);
+  // Note overall scaling factor is 4 times orthogonal
+}
+
+// For use in lieu of ADST
+static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  tran_low_t inputhalf[16];
+  for (i = 0; i < 16; ++i) {
+    output[16 + i] = input[i] * 4;
+  }
+  // Multiply input by sqrt(2)
+  for (i = 0; i < 16; ++i) {
+    inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 16] * Sqrt2);
+  }
+  fdct16(inputhalf, output);
+  // Note overall scaling factor is 4 times orthogonal
+}
+
  static void copy_block(const int16_t *src, int src_stride, int l,
                         int16_t *dest, int dest_stride) {
    int i;
@@ -1375,6 +1405,27 @@ static const transform_2d FHT_16[] = {
  #endif  // CONFIG_EXT_TX
  };
  
+#if CONFIG_EXT_TX
+static const transform_2d FHT_32[] = {
+  { fdct32,  fdct32  },                // DCT_DCT           = 0,
+  { fhalfright32, fdct32  },           // ADST_DCT          = 1,
+  { fdct32,  fhalfright32 },           // DCT_ADST          = 2,
+  { fhalfright32, fhalfright32 },      // ADST_ADST         = 3,
+  { fhalfright32, fdct32  },           // FLIPADST_DCT      = 4,
+  { fdct32,  fhalfright32 },           // DCT_FLIPADST      = 5,
+  { fhalfright32, fhalfright32 },      // FLIPADST_FLIPADST = 6,
+  { fhalfright32, fhalfright32 },      // ADST_FLIPADST     = 7,
+  { fhalfright32, fhalfright32 },      // FLIPADST_ADST     = 8,
+  { fhalfcenter32,  fdct32  },         // DST_DCT           = 9,
+  { fdct32,  fhalfcenter32  },         // DCT_DST           = 10,
+  { fhalfcenter32,  fhalfright32 },    // DST_ADST          = 11,
+  { fhalfright32, fhalfcenter32  },    // ADST_DST          = 12,
+  { fhalfcenter32,  fhalfright32 },    // DST_FLIPADST      = 13,
+  { fhalfright32, fhalfcenter32  },    // FLIPADST_DST      = 14,
+  { fhalfcenter32,  fhalfcenter32  },  // DST_DST           = 15
+};
+#endif  // CONFIG_EXT_TX
+
  void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
                     int stride, int tx_type) {
    if (tx_type == DCT_DCT) {
@@ -1671,3 +1722,46 @@ void vp10_highbd_fht16x16_c(const int16_t *input, tran_low_t *output,
    vp10_fht16x16_c(input, output, stride, tx_type);
  }
  #endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_EXT_TX
+void vp10_fht32x32_c(const int16_t *input, tran_low_t *output,
+                     int stride, int tx_type) {
+  if (tx_type == DCT_DCT) {
+    vpx_fdct32x32_c(input, output, stride);
+  } else {
+    tran_low_t out[1024];
+    int i, j;
+    tran_low_t temp_in[32], temp_out[32];
+    const transform_2d ht = FHT_32[tx_type];
+
+    int16_t flipped_input[32 * 32];
+    maybe_flip_input(&input, &stride, 32, flipped_input, tx_type);
+
+    // Columns
+    for (i = 0; i < 32; ++i) {
+      for (j = 0; j < 32; ++j)
+        temp_in[j] = input[j * stride + i] * 4;
+      ht.cols(temp_in, temp_out);
+      for (j = 0; j < 32; ++j)
+        out[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
+    }
+
+    // Rows
+    for (i = 0; i < 32; ++i) {
+      for (j = 0; j < 32; ++j)
+        temp_in[j] = out[j + i * 32];
+      ht.rows(temp_in, temp_out);
+      for (j = 0; j < 32; ++j)
+        output[j + i * 32] =
+            (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+    }
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_fht32x32_c(const int16_t *input, tran_low_t *output,
+                            int stride, int tx_type) {
+  vp10_fht32x32_c(input, output, stride, tx_type);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_EXT_TX
diff --git a/vpx_dsp/inv_txfm.c b/vpx_dsp/inv_txfm.c

index a0f59bf75713e6775f00a14a47b3bc11e207ca3a..402fd9a236d28f4f7946918f45e6b3f1070104c2 100644 (file)
--- a/vpx_dsp/inv_txfm.c
+++ b/vpx_dsp/inv_txfm.c
@@ -2057,8 +2057,8 @@ void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
    }
  }
  
-static void highbd_idct32_c(const tran_low_t *input,
-                            tran_low_t *output, int bd) {
+void vpx_highbd_idct32_c(const tran_low_t *input,
+                         tran_low_t *output, int bd) {
    tran_low_t step1[32], step2[32];
    tran_high_t temp1, temp2;
    (void) bd;
@@ -2447,7 +2447,7 @@ void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
        zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
  
      if (zero_coeff[0] | zero_coeff[1])
-      highbd_idct32_c(input, outptr, bd);
+      vpx_highbd_idct32_c(input, outptr, bd);
      else
        memset(outptr, 0, sizeof(tran_low_t) * 32);
      input += 32;
@@ -2458,7 +2458,7 @@ void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
    for (i = 0; i < 32; ++i) {
      for (j = 0; j < 32; ++j)
        temp_in[j] = out[j * 32 + i];
-    highbd_idct32_c(temp_in, temp_out, bd);
+    vpx_highbd_idct32_c(temp_in, temp_out, bd);
      for (j = 0; j < 32; ++j) {
        dest[j * stride + i] = highbd_clip_pixel_add(
            dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
@@ -2477,7 +2477,7 @@ void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
    // Rows
    // Only upper-left 8x8 has non-zero coeff.
    for (i = 0; i < 8; ++i) {
-    highbd_idct32_c(input, outptr, bd);
+    vpx_highbd_idct32_c(input, outptr, bd);
      input += 32;
      outptr += 32;
    }
@@ -2485,7 +2485,7 @@ void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
    for (i = 0; i < 32; ++i) {
      for (j = 0; j < 32; ++j)
        temp_in[j] = out[j * 32 + i];
-    highbd_idct32_c(temp_in, temp_out, bd);
+    vpx_highbd_idct32_c(temp_in, temp_out, bd);
      for (j = 0; j < 32; ++j) {
        dest[j * stride + i] = highbd_clip_pixel_add(
            dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
diff --git a/vpx_dsp/inv_txfm.h b/vpx_dsp/inv_txfm.h

index 23588139edd6dc1d616c7f071070f99f5d629977..adbb83872b6ee0126605150c305d34f10f11eaff 100644 (file)
--- a/vpx_dsp/inv_txfm.h
+++ b/vpx_dsp/inv_txfm.h
@@ -100,6 +100,7 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output);
  void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
  void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
  void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vpx_highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd);
  
  void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
  void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
diff --git a/vpx_dsp/txfm_common.h b/vpx_dsp/txfm_common.h

index 442e6a57b5ba5d84a7435674c7ff7f4217b3a0ce..9b0e9900a88f9b062d3b9e0c3fcefe5471728ff8 100644 (file)
--- a/vpx_dsp/txfm_common.h
+++ b/vpx_dsp/txfm_common.h
@@ -57,10 +57,13 @@ static const tran_high_t cospi_29_64 = 2404;
  static const tran_high_t cospi_30_64 = 1606;
  static const tran_high_t cospi_31_64 = 804;
  
-//  16384 * sqrt(2) * sin(kPi/9) * 2 / 3
+// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
  static const tran_high_t sinpi_1_9 = 5283;
  static const tran_high_t sinpi_2_9 = 9929;
  static const tran_high_t sinpi_3_9 = 13377;
  static const tran_high_t sinpi_4_9 = 15212;
  
+// 16384 * sqrt(2)
+static const tran_high_t Sqrt2 = 23170;
+
  #endif  // VPX_DSP_TXFM_COMMON_H_
author	Debargha Mukherjee <debargha@google.com>
	Sat, 13 Feb 2016 00:44:33 +0000 (16:44 -0800)
committer	Debargha Mukherjee <debargha@google.com>
	Wed, 17 Feb 2016 17:31:34 +0000 (09:31 -0800)
vp10/common/idct.c		patch \| blob \| history
vp10/common/vp10_rtcd_defs.pl		patch \| blob \| history
vp10/encoder/dct.c		patch \| blob \| history
vpx_dsp/inv_txfm.c		patch \| blob \| history
vpx_dsp/inv_txfm.h		patch \| blob \| history
vpx_dsp/txfm_common.h		patch \| blob \| history