From 9b88762b178f9f335d434090a398f2e5a6f2182d Mon Sep 17 00:00:00 2001 From: Debargha Mukherjee Date: Mon, 14 Mar 2016 22:30:09 -0700 Subject: [PATCH] Refactor 1D transforms In preparation for adding more 1D variants with ADST/FlipADST/etc. BDRATE actually improves by 0.21% on lowres. Change-Id: I2fa4720c69fe001fa666119a284dfc6b17fffab2 --- vp10/common/idct.c | 298 ++++++++++++++------------------- vp10/encoder/dct.c | 121 +++++-------- vp10/encoder/hybrid_fwd_txfm.c | 25 ++- 3 files changed, 183 insertions(+), 261 deletions(-) diff --git a/vp10/common/idct.c b/vp10/common/idct.c index f621ec61b..863f0db6b 100644 --- a/vp10/common/idct.c +++ b/vp10/common/idct.c @@ -260,6 +260,30 @@ void idst16_c(const tran_low_t *input, tran_low_t *output) { } #if CONFIG_EXT_TX +static void iidtx4_c(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 4; ++i) + output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2); +} + +static void iidtx8_c(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 8; ++i) + output[i] = input[i] * 2; +} + +static void iidtx16_c(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 16; ++i) + output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2); +} + +static void iidtx32_c(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 32; ++i) + output[i] = input[i] * 4; +} + // For use in lieu of DST static void ihalfcenter32_c(const tran_low_t *input, tran_low_t *output) { int i; @@ -291,6 +315,37 @@ static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) { } #if CONFIG_VP9_HIGHBITDEPTH +static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output, + int bd) { + int i; + for (i = 0; i < 4; ++i) + output[i] = (tran_low_t)highbd_dct_const_round_shift(input[i] * Sqrt2, bd); +} + +static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output, + int bd) { + int i; + (void) bd; + for (i = 0; i < 8; ++i) + output[i] = input[i] * 2; +} + +static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output, + int bd) { + int i; + for (i = 0; i < 16; ++i) + output[i] = (tran_low_t)highbd_dct_const_round_shift( + input[i] * 2 * Sqrt2, bd); +} + +static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output, + int bd) { + int i; + (void) bd; + for (i = 0; i < 32; ++i) + output[i] = input[i] * 4; +} + static void highbd_ihalfcenter32_c(const tran_low_t *input, tran_low_t *output, int bd) { int i; @@ -331,85 +386,19 @@ static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride, int bs, int tx_type) { int r, c; const int shift = bs < 32 ? 3 : 2; - - tran_low_t temp_in[32], temp_out[32]; - transform_2d ht = {idct4_c, idct4_c}; - int out_scale = 1; - int coeff_stride = 0; - - switch (bs) { - case 4: - ht.cols = idct4_c; - ht.rows = idct4_c; - out_scale = cospi_16_64 >> 3; - coeff_stride = 4; - break; - case 8: - ht.cols = idct8_c; - ht.rows = idct8_c; - out_scale = (1 << (DCT_CONST_BITS - 4)); - coeff_stride = 8; - break; - case 16: - ht.cols = idct16_c; - ht.rows = idct16_c; - out_scale = cospi_16_64 >> 4; - coeff_stride = 16; - break; - case 32: - ht.cols = idct32_c; - ht.rows = idct32_c; - out_scale = (1 << (DCT_CONST_BITS - 4)); - coeff_stride = 32; - break; - default: - assert(0); - } - - // Columns - if (tx_type == V_DCT) { - for (c = 0; c < bs; ++c) { - for (r = 0; r < bs; ++r) - temp_in[r] = input[r * coeff_stride + c]; - ht.cols(temp_in, temp_out); - - for (r = 0; r < bs; ++r) { - tran_high_t temp = (tran_high_t)temp_out[r] * out_scale; - temp >>= DCT_CONST_BITS; - dest[r * stride + c] = clip_pixel_add(dest[r * stride + c], - (tran_low_t)temp); - } - } - return; - } - - if (tx_type == H_DCT) { + if (tx_type == IDTX) { for (r = 0; r < bs; ++r) { for (c = 0; c < bs; ++c) - temp_in[c] = input[r * coeff_stride + c]; - ht.rows(temp_in, temp_out); - - for (c = 0; c < bs; ++c) { - tran_high_t temp = (tran_high_t)temp_out[c] * out_scale; - temp >>= DCT_CONST_BITS; - dest[r * stride + c] = clip_pixel_add(dest[r * stride + c], - (tran_low_t)temp); - } + dest[c] = clip_pixel_add(dest[c], input[c] >> shift); + dest += stride; + input += bs; } - return; - } - - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) - dest[c] = clip_pixel_add(dest[c], input[c] >> shift); - dest += stride; - input += bs; } } #define FLIPUD_PTR(dest, stride, size) do { \ - (dest) = (dest) + ((size) - 1) * (stride); \ - (stride) = - (stride); \ + (dest) = (dest) + ((size) - 1) * (stride); \ + (stride) = - (stride); \ } while (0) static void maybe_flip_strides(uint8_t **dst, int *dstride, @@ -428,6 +417,7 @@ static void maybe_flip_strides(uint8_t **dst, int *dstride, case DST_DCT: case DST_ADST: case ADST_DST: + case IDTX: case V_DCT: case H_DCT: break; @@ -705,78 +695,13 @@ static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8, const int shift = bs < 32 ? 3 : 2; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - tran_low_t temp_in[32], temp_out[32]; - highbd_transform_2d ht = {vpx_highbd_idct4_c, vpx_highbd_idct4_c}; - int out_scale = 1; - int coeff_stride = 0; - - switch (bs) { - case 4: - ht.cols = vpx_highbd_idct4_c; - ht.rows = vpx_highbd_idct4_c; - out_scale = cospi_16_64 >> 3; - coeff_stride = 4; - break; - case 8: - ht.cols = vpx_highbd_idct8_c; - ht.rows = vpx_highbd_idct8_c; - out_scale = (1 << (DCT_CONST_BITS - 4)); - coeff_stride = 8; - break; - case 16: - ht.cols = vpx_highbd_idct16_c; - ht.rows = vpx_highbd_idct16_c; - out_scale = cospi_16_64 >> 4; - coeff_stride = 16; - break; - case 32: - ht.cols = vpx_highbd_idct32_c; - ht.rows = vpx_highbd_idct32_c; - out_scale = (1 << (DCT_CONST_BITS - 4)); - coeff_stride = 32; - break; - default: - assert(0); - } - - // Columns - if (tx_type == V_DCT) { - for (c = 0; c < bs; ++c) { - for (r = 0; r < bs; ++r) - temp_in[r] = input[r * coeff_stride + c]; - ht.cols(temp_in, temp_out, bd); - - for (r = 0; r < bs; ++r) { - tran_high_t temp = (tran_high_t)temp_out[r] * out_scale; - temp >>= DCT_CONST_BITS; - dest[r * stride + c] = highbd_clip_pixel_add(dest[r * stride + c], - (tran_low_t)temp, bd); - } - } - return; - } - - if (tx_type == H_DCT) { + if (tx_type == IDTX) { for (r = 0; r < bs; ++r) { for (c = 0; c < bs; ++c) - temp_in[c] = input[r * coeff_stride + c]; - ht.rows(temp_in, temp_out, bd); - - for (c = 0; c < bs; ++c) { - tran_high_t temp = (tran_high_t)temp_out[c] * out_scale; - temp >>= DCT_CONST_BITS; - dest[r * stride + c] = highbd_clip_pixel_add(dest[r * stride + c], - (tran_low_t)temp, bd); - } + dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd); + dest += stride; + input += bs; } - return; - } - - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) - dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd); - dest += stride; - input += bs; } } @@ -796,6 +721,9 @@ static void maybe_flip_strides16(uint16_t **dst, int *dstride, case DST_DCT: case DST_ADST: case ADST_DST: + case IDTX: + case V_DCT: + case H_DCT: break; case FLIPADST_DCT: case FLIPADST_ADST: @@ -843,6 +771,9 @@ void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, { idst4_c, iadst4_c }, // DST_FLIPADST = 13, { iadst4_c, idst4_c }, // FLIPADST_DST = 14, { idst4_c, idst4_c }, // DST_DST = 15 + { iidtx4_c, iidtx4_c }, // IDTX = 16 + { idct4_c, iidtx4_c }, // V_DCT = 17 + { iidtx4_c, idct4_c }, // H_DCT = 18 #endif // CONFIG_EXT_TX }; @@ -906,6 +837,9 @@ void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, { idst8_c, iadst8_c }, // DST_FLIPADST = 13, { iadst8_c, idst8_c }, // FLIPADST_DST = 14, { idst8_c, idst8_c }, // DST_DST = 15 + { iidtx8_c, iidtx8_c }, // IDTX = 16 + { idct8_c, iidtx8_c }, // V_DCT = 17 + { iidtx8_c, idct8_c }, // H_DCT = 18 #endif // CONFIG_EXT_TX }; @@ -969,6 +903,9 @@ void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, { idst16_c, iadst16_c }, // DST_FLIPADST = 13, { iadst16_c, idst16_c }, // FLIPADST_DST = 14, { idst16_c, idst16_c }, // DST_DST = 15 + { iidtx16_c, iidtx16_c }, // IDTX = 16 + { idct16_c, iidtx16_c }, // V_DCT = 17 + { iidtx16_c, idct16_c }, // H_DCT = 18 #endif // CONFIG_EXT_TX }; @@ -1032,6 +969,9 @@ void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, { ihalfcenter32_c, ihalfright32_c }, // DST_FLIPADST = 13, { ihalfright32_c, ihalfcenter32_c }, // FLIPADST_DST = 14, { ihalfcenter32_c, ihalfcenter32_c }, // DST_DST = 15 + { iidtx32_c, iidtx32_c }, // IDTX = 16 + { idct32_c, iidtx32_c }, // V_DCT = 17 + { iidtx32_c, idct32_c }, // H_DCT = 18 }; int i, j; @@ -1165,11 +1105,11 @@ void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, case ADST_DST: case FLIPADST_DST: case DST_FLIPADST: + case H_DCT: + case V_DCT: // Use C version since DST only exists in C code vp10_iht4x4_16_add_c(input, dest, stride, tx_type); break; - case H_DCT: - case V_DCT: case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break; @@ -1206,11 +1146,11 @@ void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, case ADST_DST: case FLIPADST_DST: case DST_FLIPADST: + case H_DCT: + case V_DCT: // Use C version since DST only exists in C code vp10_iht8x8_64_add_c(input, dest, stride, tx_type); break; - case H_DCT: - case V_DCT: case IDTX: inv_idtx_add_c(input, dest, stride, 8, tx_type); break; @@ -1247,11 +1187,11 @@ void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, case ADST_DST: case FLIPADST_DST: case DST_FLIPADST: + case H_DCT: + case V_DCT: // Use C version since DST only exists in C code vp10_iht16x16_256_add_c(input, dest, stride, tx_type); break; - case H_DCT: - case V_DCT: case IDTX: inv_idtx_add_c(input, dest, stride, 16, tx_type); break; @@ -1284,10 +1224,10 @@ void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, case ADST_DST: case FLIPADST_DST: case DST_FLIPADST: - vp10_iht32x32_1024_add_c(input, dest, stride, tx_type); - break; case H_DCT: case V_DCT: + vp10_iht32x32_1024_add_c(input, dest, stride, tx_type); + break; case IDTX: inv_idtx_add_c(input, dest, stride, 32, tx_type); break; @@ -1319,6 +1259,9 @@ void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 13, { vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 14, { highbd_idst4_c, highbd_idst4_c }, // DST_DST = 15 + { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX = 16 + { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT = 17 + { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT = 18 #endif // CONFIG_EXT_TX }; @@ -1385,6 +1328,9 @@ void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 13, { vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 14, { highbd_idst8_c, highbd_idst8_c }, // DST_DST = 15 + { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX = 16 + { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT = 17 + { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT = 18 #endif // CONFIG_EXT_TX }; @@ -1451,6 +1397,9 @@ void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 13, { vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 14, { highbd_idst16_c, highbd_idst16_c }, // DST_DST = 15 + { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX = 16 + { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT = 17 + { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT = 18 #endif // CONFIG_EXT_TX }; @@ -1501,22 +1450,25 @@ void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { static const highbd_transform_2d HIGH_IHT_32[] = { - { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT - { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT - { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST - { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST - { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT - { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST - { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST - { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST - { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST - { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT - { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST - { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST - { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST - { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST - { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST - { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST + { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT + { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT + { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST + { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST + { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT + { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST + { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST + { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST + { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST + { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT + { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST + { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST + { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST + { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST + { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST + { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST + { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX + { vpx_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT + { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT }; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); @@ -1657,11 +1609,11 @@ void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, case ADST_DST: case FLIPADST_DST: case DST_FLIPADST: + case H_DCT: + case V_DCT: // Use C version since DST only exists in C code vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd); break; - case H_DCT: - case V_DCT: case IDTX: highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd); break; @@ -1699,11 +1651,11 @@ void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, case ADST_DST: case FLIPADST_DST: case DST_FLIPADST: + case H_DCT: + case V_DCT: // Use C version since DST only exists in C code vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd); break; - case H_DCT: - case V_DCT: case IDTX: highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd); break; @@ -1741,11 +1693,11 @@ void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, case ADST_DST: case FLIPADST_DST: case DST_FLIPADST: + case H_DCT: + case V_DCT: // Use C version since DST only exists in C code vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd); break; - case H_DCT: - case V_DCT: case IDTX: highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd); break; @@ -1779,10 +1731,10 @@ void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, case ADST_DST: case FLIPADST_DST: case DST_FLIPADST: - vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd); - break; case H_DCT: case V_DCT: + vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd); + break; case IDTX: highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd); break; diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c index 31a4c87c2..8a1ee201c 100644 --- a/vp10/encoder/dct.c +++ b/vp10/encoder/dct.c @@ -1212,6 +1212,30 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) { } #if CONFIG_EXT_TX +static void fidtx4(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 4; ++i) + output[i] = (tran_low_t)fdct_round_shift(input[i] * Sqrt2); +} + +static void fidtx8(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 8; ++i) + output[i] = input[i] * 2; +} + +static void fidtx16(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 16; ++i) + output[i] = (tran_low_t)fdct_round_shift(input[i] * 2 * Sqrt2); +} + +static void fidtx32(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 32; ++i) + output[i] = input[i] * 4; +} + // For use in lieu of DST static void fhalfcenter32(const tran_low_t *input, tran_low_t *output) { int i; @@ -1315,6 +1339,7 @@ static void maybe_flip_input(const int16_t **src, int *src_stride, int l, case DST_DCT: case DST_ADST: case ADST_DST: + case IDTX: case H_DCT: case V_DCT: break; @@ -1362,6 +1387,9 @@ static const transform_2d FHT_4[] = { { fdst4, fadst4 }, // DST_FLIPADST = 13, { fadst4, fdst4 }, // FLIPADST_DST = 14, { fdst4, fdst4 }, // DST_DST = 15 + { fidtx4, fidtx4 }, // IDTX = 16 + { fdct4, fidtx4 }, // V_DCT = 17 + { fidtx4, fdct4 }, // H_DCT = 18 #endif // CONFIG_EXT_TX }; @@ -1383,6 +1411,9 @@ static const transform_2d FHT_8[] = { { fdst8, fadst8 }, // DST_FLIPADST = 13, { fadst8, fdst8 }, // FLIPADST_DST = 14, { fdst8, fdst8 }, // DST_DST = 15 + { fidtx8, fidtx8 }, // IDTX = 16 + { fdct8, fidtx8 }, // V_DCT = 17 + { fidtx8, fdct8 }, // H_DCT = 18 #endif // CONFIG_EXT_TX }; @@ -1404,6 +1435,9 @@ static const transform_2d FHT_16[] = { { fdst16, fadst16 }, // DST_FLIPADST = 13, { fadst16, fdst16 }, // FLIPADST_DST = 14, { fdst16, fdst16 }, // DST_DST = 15 + { fidtx16, fidtx16 }, // IDTX = 16 + { fdct16, fidtx16 }, // V_DCT = 17 + { fidtx16, fdct16 }, // H_DCT = 18 #endif // CONFIG_EXT_TX }; @@ -1425,6 +1459,9 @@ static const transform_2d FHT_32[] = { { fhalfcenter32, fhalfright32 }, // DST_FLIPADST = 13, { fhalfright32, fhalfcenter32 }, // FLIPADST_DST = 14, { fhalfcenter32, fhalfcenter32 }, // DST_DST = 15 + { fidtx32, fidtx32 }, // IDTX = 16 + { fdct32, fidtx32 }, // V_DCT = 17 + { fidtx32, fdct32 }, // H_DCT = 18 }; #endif // CONFIG_EXT_TX @@ -1766,86 +1803,12 @@ void vp10_fwd_idtx_c(const int16_t *src_diff, int bs, int tx_type) { int r, c; const int shift = bs < 32 ? 3 : 2; - - const int16_t *input = src_diff; - tran_low_t *output = coeff; - - int i, j; - tran_low_t temp_in[32], temp_out[32]; - transform_2d ht = {fdct4, fdct4}; - int in_scale = 1; - int out_scale = 1; - int coeff_stride = 0; - - switch (bs) { - case 4: - ht.cols = fdct4; - ht.rows = fdct4; - in_scale = 16; - out_scale = cospi_16_64 >> 1; - coeff_stride = 4; - break; - case 8: - ht.cols = fdct8; - ht.rows = fdct8; - in_scale = 4; - out_scale = (1 << DCT_CONST_BITS); - coeff_stride = 8; - break; - case 16: - ht.cols = fdct16; - ht.rows = fdct16; - in_scale = 4; - out_scale = cospi_16_64; - coeff_stride = 16; - break; - case 32: - ht.cols = fdct32; - ht.rows = fdct32; - in_scale = 4; - out_scale = (1 << (DCT_CONST_BITS - 2)); - coeff_stride = 32; - break; - default: - assert(0); - } - - // Columns - if (tx_type == V_DCT) { - for (i = 0; i < bs; ++i) { - for (j = 0; j < bs; ++j) - temp_in[j] = input[j * stride + i] * in_scale; - ht.cols(temp_in, temp_out); - - for (j = 0; j < bs; ++j) { - tran_high_t temp = (tran_high_t)temp_out[j] * out_scale; - temp >>= DCT_CONST_BITS; - output[j * coeff_stride + i] = (tran_low_t)temp; - } + if (tx_type == IDTX) { + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift; + src_diff += stride; + coeff += bs; } - return; - } - - // Rows - if (tx_type == H_DCT) { - for (j = 0; j < bs; ++j) { - for (i = 0; i < bs; ++i) - temp_in[i] = input[j * stride + i] * in_scale; - ht.rows(temp_in, temp_out); - - for (i = 0; i < bs; ++i) { - tran_high_t temp = (tran_high_t)temp_out[i] * out_scale; - temp >>= DCT_CONST_BITS; - output[j * coeff_stride + i] = (tran_low_t)temp; - } - } - return; - } - - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift; - src_diff += stride; - coeff += bs; } } diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c index c3a739b7d..faedb4349 100644 --- a/vp10/encoder/hybrid_fwd_txfm.c +++ b/vp10/encoder/hybrid_fwd_txfm.c @@ -65,6 +65,8 @@ void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, break; case H_DCT: case V_DCT: + vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type); + break; case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type); break; @@ -105,6 +107,8 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, break; case H_DCT: case V_DCT: + vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type); + break; case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type); break; @@ -145,6 +149,8 @@ static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, break; case H_DCT: case V_DCT: + vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type); + break; case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type); break; @@ -185,6 +191,8 @@ static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, break; case H_DCT: case V_DCT: + vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type); + break; case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type); break; @@ -226,11 +234,10 @@ void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, case ADST_DST: case DST_FLIPADST: case FLIPADST_DST: - // Use C version since DST exists only in C - vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type); - break; case H_DCT: case V_DCT: + vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type); + break; case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type); break; @@ -270,11 +277,11 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, case ADST_DST: case DST_FLIPADST: case FLIPADST_DST: + case H_DCT: + case V_DCT: // Use C version since DST exists only in C vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type); break; - case H_DCT: - case V_DCT: case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type); break; @@ -314,11 +321,11 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, case ADST_DST: case DST_FLIPADST: case FLIPADST_DST: + case H_DCT: + case V_DCT: // Use C version since DST exists only in C vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type); break; - case H_DCT: - case V_DCT: case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type); break; @@ -355,10 +362,10 @@ static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, case ADST_DST: case DST_FLIPADST: case FLIPADST_DST: - vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type); - break; case H_DCT: case V_DCT: + vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type); + break; case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type); break; -- 2.40.0