From: Debargha Mukherjee Date: Thu, 10 Sep 2015 00:50:20 +0000 (-0700) Subject: Comprehensive support for symmetric DST X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4ce81d666ebef780c47906da9ca86070c9e94874;p=libvpx Comprehensive support for symmetric DST Creates new hybrid transforms combining symmetric DST with ADST and DCT. Thus a total of 16 transforms are supported. derfl: +1.659% (up about 0.2%) Change-Id: Idde1cecdb59527890bf05da740099c3f6a5b9764 --- diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index 70983ca72..597cf125c 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -239,6 +239,13 @@ static TX_TYPE ext_tx_to_txtype[EXT_TX_TYPES] = { DCT_ADST, FLIPADST_DCT, DCT_FLIPADST, + DST_DST, + DST_DCT, + DCT_DST, + DST_ADST, + ADST_DST, + DST_FLIPADST, + FLIPADST_DST, }; #endif // CONFIG_EXT_TX diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index b9d5af9c7..c6384231e 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -317,19 +317,26 @@ static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] #if CONFIG_EXT_TX const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(EXT_TX_TYPES)] = { -NORM, 2, - 4, 10, - 6, 8, - -ALT1, -ALT2, - -ALT3, -ALT4, + -ALT9, 4, + 6, 16, + 8, 10, + -ALT10, -ALT11, 12, 14, - -ALT5, -ALT6, - -ALT7, -ALT8, + -ALT1, -ALT2, + -ALT4, -ALT5, + 18, 24, + 20, 22, + -ALT12, -ALT13, + -ALT14, -ALT15, + 26, 28, + -ALT3, -ALT6, + -ALT7, -ALT8 }; static const vpx_prob default_ext_tx_prob[EXT_TX_SIZES][EXT_TX_TYPES - 1] = { - { 240, 128, 128, 128, 128, 128, 128, 128 }, - { 208, 128, 128, 128, 128, 128, 128, 128 }, - { 176, 128, 128, 128, 128, 128, 128, 128 }, + { 216, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 192, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 168, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, }; #endif // CONFIG_EXT_TX diff --git a/vp10/common/enums.h b/vp10/common/enums.h index a63b4932a..a4a544089 100644 --- a/vp10/common/enums.h +++ b/vp10/common/enums.h @@ -100,6 +100,13 @@ typedef enum { FLIPADST_FLIPADST = 6, ADST_FLIPADST = 7, FLIPADST_ADST = 8, + DST_DST = 9, + DST_DCT = 10, + DCT_DST = 11, + DST_ADST = 12, + ADST_DST = 13, + DST_FLIPADST = 14, + FLIPADST_DST = 15, #endif // CONFIG_EXT_TX TX_TYPES, } TX_TYPE; @@ -116,6 +123,13 @@ typedef enum { ALT6 = 6, ALT7 = 7, ALT8 = 8, + ALT9 = 9, + ALT10 = 10, + ALT11 = 11, + ALT12 = 12, + ALT13 = 13, + ALT14 = 14, + ALT15 = 15, EXT_TX_TYPES } EXT_TX_TYPE; #endif // CONFIG_EXT_TX diff --git a/vp10/common/idct.c b/vp10/common/idct.c index 6533690ef..9e1f87007 100644 --- a/vp10/common/idct.c +++ b/vp10/common/idct.c @@ -17,45 +17,170 @@ #include "vpx_dsp/inv_txfm.h" #include "vpx_ports/mem.h" -void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - const transform_2d IHT_4[] = { - { idct4_c, idct4_c }, // DCT_DCT = 0 - { iadst4_c, idct4_c }, // ADST_DCT = 1 - { idct4_c, iadst4_c }, // DCT_ADST = 2 - { iadst4_c, iadst4_c } // ADST_ADST = 3 +#if CONFIG_EXT_TX +void idst4_c(const tran_low_t *input, tran_low_t *output) { + static const int N = 4; + static const int sinvalue_lookup_table[] = { + 9630, 15582 }; + static const int mult = 14654; // sqrt(4/5) + int i, j; + for (i = 0; i < N; i++) { + int64_t sum = 0; + for (j = 0; j < N; j++) { + int idx = (i + 1) * (j + 1); + int sign = 0; + if (idx > N + 1) { + sign = (idx / (N + 1)) & 1; + idx %= (N + 1); + } + idx = idx > N + 1 - idx ? N + 1 - idx : idx; + if (idx == 0) continue; + idx--; + sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1); + } + sum = (sum * mult) >> (2 * DCT_CONST_BITS); + output[i] = WRAPLOW(sum, 8); + } +} +void idst8_c(const tran_low_t *input, tran_low_t *output) { + static const int N = 8; + static const int sinvalue_lookup_table[] = { + 5604, 10531, 14189, 16135 + }; + static const int mult = 15447; // 2*sqrt(2/9) int i, j; - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - tran_low_t temp_in[4], temp_out[4]; + for (i = 0; i < N; i++) { + int64_t sum = 0; + for (j = 0; j < N; j++) { + int idx = (i + 1) * (j + 1); + int sign = 0; + if (idx > N + 1) { + sign = (idx / (N + 1)) & 1; + idx %= (N + 1); + } + idx = idx > N + 1 - idx ? N + 1 - idx : idx; + if (idx == 0) continue; + idx--; + sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1); + } + sum = (sum * mult) >> (2 * DCT_CONST_BITS); + output[i] = WRAPLOW(sum, 8); + } +} - // inverse transform row vectors - for (i = 0; i < 4; ++i) { - IHT_4[tx_type].rows(input, outptr); - input += 4; - outptr += 4; +void idst16_c(const tran_low_t *input, tran_low_t *output) { + static const int N = 16; + static const int sinvalue_lookup_table[] = { + 3011, 5919, 8625, 11038, + 13075, 14666, 15759, 16314 + }; + static const int mult = 15895; // 2*sqrt(4/17) + int i, j; + for (i = 0; i < N; i++) { + int64_t sum = 0; + for (j = 0; j < N; j++) { + int idx = (i + 1) * (j + 1); + int sign = 0; + if (idx > N + 1) { + sign = (idx / (N + 1)) & 1; + idx %= (N + 1); + } + idx = idx > N + 1 - idx ? N + 1 - idx : idx; + if (idx == 0) continue; + idx--; + sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1); + } + sum = (sum * mult) >> (2 * DCT_CONST_BITS); + output[i] = WRAPLOW(sum, 8); } +} - // inverse transform column vectors - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - IHT_4[tx_type].cols(temp_in, temp_out); - for (j = 0; j < 4; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 4)); +#if CONFIG_VP9_HIGHBITDEPTH +void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) { + static const int N = 4; + static const int sinvalue_lookup_table[] = { + 9630, 15582 + }; + static const int mult = 14654; // sqrt(4/5) + int i, j; + (void) bd; + for (i = 0; i < N; i++) { + int64_t sum = 0; + for (j = 0; j < N; j++) { + int idx = (i + 1) * (j + 1); + int sign = 0; + if (idx > N + 1) { + sign = (idx / (N + 1)) & 1; + idx %= (N + 1); + } + idx = idx > N + 1 - idx ? N + 1 - idx : idx; + if (idx == 0) continue; + idx--; + sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1); } + sum = (sum * mult) >> (2 * DCT_CONST_BITS); + output[i] = WRAPLOW(sum, bd); } } -static const transform_2d IHT_8[] = { - { idct8_c, idct8_c }, // DCT_DCT = 0 - { iadst8_c, idct8_c }, // ADST_DCT = 1 - { idct8_c, iadst8_c }, // DCT_ADST = 2 - { iadst8_c, iadst8_c } // ADST_ADST = 3 -}; +void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) { + static const int N = 8; + static const int sinvalue_lookup_table[] = { + 5604, 10531, 14189, 16135 + }; + static const int mult = 15447; // 2*sqrt(2/9) + int i, j; + (void) bd; + for (i = 0; i < N; i++) { + int64_t sum = 0; + for (j = 0; j < N; j++) { + int idx = (i + 1) * (j + 1); + int sign = 0; + if (idx > N + 1) { + sign = (idx / (N + 1)) & 1; + idx %= (N + 1); + } + idx = idx > N + 1 - idx ? N + 1 - idx : idx; + if (idx == 0) continue; + idx--; + sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1); + } + sum = (sum * mult) >> (2 * DCT_CONST_BITS); + output[i] = WRAPLOW(sum, bd); + } +} + +void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) { + static const int N = 16; + static const int sinvalue_lookup_table[] = { + 3011, 5919, 8625, 11038, + 13075, 14666, 15759, 16314 + }; + static const int mult = 15895; // 2*sqrt(4/17) + int i, j; + (void) bd; + for (i = 0; i < N; i++) { + int64_t sum = 0; + for (j = 0; j < N; j++) { + int idx = (i + 1) * (j + 1); + int sign = 0; + if (idx > N + 1) { + sign = (idx / (N + 1)) & 1; + idx %= (N + 1); + } + idx = idx > N + 1 - idx ? N + 1 - idx : idx; + if (idx == 0) continue; + idx--; + sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1); + } + sum = (sum * mult) >> (2 * DCT_CONST_BITS); + output[i] = WRAPLOW(sum, bd); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_EXT_TX #if CONFIG_EXT_TX void fliplr(uint8_t *dest, int stride, int l) { @@ -125,8 +250,76 @@ void fliplrud16(uint16_t *dest, int stride, int l) { } #endif // CONFIG_EXT_TX +void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + const transform_2d IHT_4[] = { + { idct4_c, idct4_c }, // DCT_DCT = 0 + { iadst4_c, idct4_c }, // ADST_DCT = 1 + { idct4_c, iadst4_c }, // DCT_ADST = 2 + { iadst4_c, iadst4_c }, // ADST_ADST = 3 +#if CONFIG_EXT_TX + { iadst4_c, idct4_c }, // FLIPADST_DCT = 4 + { idct4_c, iadst4_c }, // DCT_FLIPADST = 5 + { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST = 6 + { iadst4_c, iadst4_c }, // ADST_FLIPADST = 7 + { iadst4_c, iadst4_c }, // FLIPADST_ADST = 8 + { idst4_c, idst4_c }, // DST_DST = 9 + { idst4_c, idct4_c }, // DST_DCT = 10 + { idct4_c, idst4_c }, // DCT_DST = 11 + { idst4_c, iadst4_c }, // DST_ADST = 12 + { iadst4_c, idst4_c }, // ADST_DST = 13 + { idst4_c, iadst4_c }, // DST_FLIPADST = 14 + { iadst4_c, idst4_c }, // FLIPADST_DST = 15 +#endif // CONFIG_EXT_TX + }; + + int i, j; + tran_low_t out[4 * 4]; + tran_low_t *outptr = out; + tran_low_t temp_in[4], temp_out[4]; + + // inverse transform row vectors + for (i = 0; i < 4; ++i) { + IHT_4[tx_type].rows(input, outptr); + input += 4; + outptr += 4; + } + + // inverse transform column vectors + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = out[j * 4 + i]; + IHT_4[tx_type].cols(temp_in, temp_out); + for (j = 0; j < 4; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 4)); + } + } +} + void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { + static const transform_2d IHT_8[] = { + { idct8_c, idct8_c }, // DCT_DCT = 0 + { iadst8_c, idct8_c }, // ADST_DCT = 1 + { idct8_c, iadst8_c }, // DCT_ADST = 2 + { iadst8_c, iadst8_c }, // ADST_ADST = 3 +#if CONFIG_EXT_TX + { iadst8_c, idct8_c }, // FLIPADST_DCT = 4 + { idct8_c, iadst8_c }, // DCT_FLIPADST = 5 + { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST = 6 + { iadst8_c, iadst8_c }, // ADST_FLIPADST = 7 + { iadst8_c, iadst8_c }, // FLIPADST_ADST = 8 + { idst8_c, idst8_c }, // DST_DST = 9 + { idst8_c, idct8_c }, // DST_DCT = 10 + { idct8_c, idst8_c }, // DCT_DST = 11 + { idst8_c, iadst8_c }, // DST_ADST = 12 + { iadst8_c, idst8_c }, // ADST_DST = 13 + { idst8_c, iadst8_c }, // DST_FLIPADST = 14 + { iadst8_c, idst8_c }, // FLIPADST_DST = 15 +#endif // CONFIG_EXT_TX + }; + int i, j; tran_low_t out[8 * 8]; tran_low_t *outptr = out; @@ -152,15 +345,29 @@ void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, } } -static const transform_2d IHT_16[] = { - { idct16_c, idct16_c }, // DCT_DCT = 0 - { iadst16_c, idct16_c }, // ADST_DCT = 1 - { idct16_c, iadst16_c }, // DCT_ADST = 2 - { iadst16_c, iadst16_c } // ADST_ADST = 3 -}; - void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { + static const transform_2d IHT_16[] = { + { idct16_c, idct16_c }, // DCT_DCT = 0 + { iadst16_c, idct16_c }, // ADST_DCT = 1 + { idct16_c, iadst16_c }, // DCT_ADST = 2 + { iadst16_c, iadst16_c }, // ADST_ADST = 3 +#if CONFIG_EXT_TX + { iadst16_c, idct16_c }, // FLIPADST_DCT = 4 + { idct16_c, iadst16_c }, // DCT_FLIPADST = 5 + { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST = 6 + { iadst16_c, iadst16_c }, // ADST_FLIPADST = 7 + { iadst16_c, iadst16_c }, // FLIPADST_ADST = 8 + { idst16_c, idst16_c }, // DST_DST = 9 + { idst16_c, idct16_c }, // DST_DCT = 10 + { idct16_c, idst16_c }, // DCT_DST = 11 + { idst16_c, iadst16_c }, // DST_ADST = 12 + { iadst16_c, idst16_c }, // ADST_DST = 13 + { idst16_c, iadst16_c }, // DST_FLIPADST = 14 + { iadst16_c, idst16_c }, // FLIPADST_DST = 15 +#endif // CONFIG_EXT_TX + }; + int i, j; tran_low_t out[16 * 16]; tran_low_t *outptr = out; @@ -286,6 +493,24 @@ void vp10_inv_txfm_add_4x4( vp10_iht4x4_16_add(input, dest, stride, ADST_ADST); flipud(dest, stride, 4); break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + // Use C version since DST only exists in C code + vp10_iht4x4_16_add_c(input, dest, stride, tx_type); + break; + case FLIPADST_DST: + flipud(dest, stride, 4); + vp10_iht4x4_16_add_c(input, dest, stride, ADST_DST); + flipud(dest, stride, 4); + break; + case DST_FLIPADST: + fliplr(dest, stride, 4); + vp10_iht4x4_16_add_c(input, dest, stride, DST_ADST); + fliplr(dest, stride, 4); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -330,6 +555,24 @@ void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, vp10_iht8x8_64_add(input, dest, stride, ADST_ADST); flipud(dest, stride, 8); break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + // Use C version since DST only exists in C code + vp10_iht8x8_64_add_c(input, dest, stride, tx_type); + break; + case FLIPADST_DST: + flipud(dest, stride, 8); + vp10_iht8x8_64_add_c(input, dest, stride, ADST_DST); + flipud(dest, stride, 8); + break; + case DST_FLIPADST: + fliplr(dest, stride, 8); + vp10_iht8x8_64_add_c(input, dest, stride, DST_ADST); + fliplr(dest, stride, 8); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -374,6 +617,24 @@ void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, vp10_iht16x16_256_add(input, dest, stride, ADST_ADST); flipud(dest, stride, 16); break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + // Use C version since DST only exists in C code + vp10_iht16x16_256_add_c(input, dest, stride, tx_type); + break; + case FLIPADST_DST: + flipud(dest, stride, 16); + vp10_iht16x16_256_add_c(input, dest, stride, ADST_DST); + flipud(dest, stride, 16); + break; + case DST_FLIPADST: + fliplr(dest, stride, 16); + vp10_iht16x16_256_add_c(input, dest, stride, DST_ADST); + fliplr(dest, stride, 16); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -402,10 +663,24 @@ void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { const highbd_transform_2d IHT_4[] = { - { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 - { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 - { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 - { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 + { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 + { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 + { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST = 3 +#if CONFIG_EXT_TX + { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT = 4 + { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST = 5 + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST = 6 + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST = 7 + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST = 8 + { highbd_idst4_c, highbd_idst4_c }, // DST_DST = 9 + { highbd_idst4_c, vpx_highbd_idct4_c }, // DST_DCT = 10 + { vpx_highbd_idct4_c, highbd_idst4_c }, // DCT_DST = 11 + { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_ADST = 12 + { vpx_highbd_iadst4_c, highbd_idst4_c }, // ADST_DST = 13 + { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 14 + { vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 15 +#endif // CONFIG_EXT_TX }; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); @@ -433,15 +708,29 @@ void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, } } -static const highbd_transform_2d HIGH_IHT_8[] = { - { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 - { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 - { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 - { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3 -}; - void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { + static const highbd_transform_2d HIGH_IHT_8[] = { + { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 + { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 + { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST = 3 +#if CONFIG_EXT_TX + { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT = 4 + { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST = 5 + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST = 6 + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST = 7 + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST = 8 + { highbd_idst8_c, highbd_idst8_c }, // DST_DST = 9 + { highbd_idst8_c, vpx_highbd_idct8_c }, // DST_DCT = 10 + { vpx_highbd_idct8_c, highbd_idst8_c }, // DCT_DST = 11 + { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_ADST = 12 + { vpx_highbd_iadst8_c, highbd_idst8_c }, // ADST_DST = 13 + { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 14 + { vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 15 +#endif // CONFIG_EXT_TX + }; + int i, j; tran_low_t out[8 * 8]; tran_low_t *outptr = out; @@ -468,15 +757,29 @@ void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, } } -static const highbd_transform_2d HIGH_IHT_16[] = { - { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 - { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 - { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 - { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3 -}; - void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { + static const highbd_transform_2d HIGH_IHT_16[] = { + { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 + { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 + { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST = 3 +#if CONFIG_EXT_TX + { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT = 4 + { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST = 5 + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST = 6 + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST = 7 + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST = 8 + { highbd_idst16_c, highbd_idst16_c }, // DST_DST = 9 + { highbd_idst16_c, vpx_highbd_idct16_c }, // DST_DCT = 10 + { vpx_highbd_idct16_c, highbd_idst16_c }, // DCT_DST = 11 + { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_ADST = 12 + { vpx_highbd_iadst16_c, highbd_idst16_c }, // ADST_DST = 13 + { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 14 + { vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 15 +#endif // CONFIG_EXT_TX + }; + int i, j; tran_low_t out[16 * 16]; tran_low_t *outptr = out; @@ -606,6 +909,24 @@ void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, vp10_highbd_iht4x4_16_add(input, dest, stride, ADST_ADST, bd); flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4); break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + // Use C version since DST only exists in C code + vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd); + break; + case FLIPADST_DST: + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4); + vp10_highbd_iht4x4_16_add_c(input, dest, stride, ADST_DST, bd); + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4); + break; + case DST_FLIPADST: + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4); + vp10_highbd_iht4x4_16_add_c(input, dest, stride, DST_ADST, bd); + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -651,6 +972,24 @@ void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, vp10_highbd_iht8x8_64_add(input, dest, stride, ADST_ADST, bd); flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8); break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + // Use C version since DST only exists in C code + vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd); + break; + case FLIPADST_DST: + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8); + vp10_highbd_iht8x8_64_add_c(input, dest, stride, ADST_DST, bd); + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8); + break; + case DST_FLIPADST: + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8); + vp10_highbd_iht8x8_64_add_c(input, dest, stride, DST_ADST, bd); + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -696,6 +1035,24 @@ void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, vp10_highbd_iht16x16_256_add(input, dest, stride, ADST_ADST, bd); flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16); break; + case DST_DST: + case DST_DCT: + case DCT_DST: + case DST_ADST: + case ADST_DST: + // Use C version since DST only exists in C code + vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd); + break; + case FLIPADST_DST: + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16); + vp10_highbd_iht16x16_256_add_c(input, dest, stride, ADST_DST, bd); + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16); + break; + case DST_FLIPADST: + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16); + vp10_highbd_iht16x16_256_add_c(input, dest, stride, DST_ADST, bd); + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16); + break; #endif // CONFIG_EXT_TX default: assert(0); diff --git a/vp10/common/scan.c b/vp10/common/scan.c index 4cb78e880..57095d966 100644 --- a/vp10/common/scan.c +++ b/vp10/common/scan.c @@ -696,7 +696,6 @@ DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_32x32[1024]) = { }; #if CONFIG_EXT_TX - const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { { // TX_4X4 {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, @@ -708,6 +707,13 @@ const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, }, { // TX_8X8 {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors}, @@ -718,6 +724,13 @@ const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, }, { // TX_16X16 {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, @@ -728,6 +741,13 @@ const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, }, { // TX_32X32 {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, @@ -738,6 +758,13 @@ const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, } }; @@ -752,6 +779,13 @@ const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, }, { // TX_8X8 {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, @@ -762,6 +796,13 @@ const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, }, { // TX_16X16 {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, @@ -772,6 +813,13 @@ const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, }, { // TX_32X32 {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, @@ -782,6 +830,13 @@ const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, } }; @@ -810,5 +865,4 @@ const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, } }; - #endif // CONFIG_EXT_TX diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c index 288d5d092..cb1ede253 100644 --- a/vp10/encoder/dct.c +++ b/vp10/encoder/dct.c @@ -20,6 +20,84 @@ #include "vpx_dsp/fwd_txfm.h" #include "vpx_ports/mem.h" +#if CONFIG_EXT_TX +void fdst4(const tran_low_t *input, tran_low_t *output) { + static const int N = 4; + static const int sinvalue_lookup_table[] = { + 9630, 15582 + }; + static const int mult = 14654; // sqrt(4/5) + int i, j; + for (i = 0; i < N; i++) { + int64_t sum = 0; + for (j = 0; j < N; j++) { + int idx = (i + 1) * (j + 1); + int sign = 0; + if (idx > N + 1) { + sign = (idx / (N + 1)) & 1; + idx %= (N + 1); + } + idx = idx > N + 1 - idx ? N + 1 - idx : idx; + if (idx == 0) continue; + idx--; + sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1); + } + output[i] = (sum * mult) >> (2 * DCT_CONST_BITS); + } +} + +void fdst8(const tran_low_t *input, tran_low_t *output) { + static const int N = 8; + static const int sinvalue_lookup_table[] = { + 5604, 10531, 14189, 16135 + }; + static const int mult = 15447; // 2*sqrt(2/9) + int i, j; + for (i = 0; i < N; i++) { + int64_t sum = 0; + for (j = 0; j < N; j++) { + int idx = (i + 1) * (j + 1); + int sign = 0; + if (idx > N + 1) { + sign = (idx / (N + 1)) & 1; + idx %= (N + 1); + } + idx = idx > N + 1 - idx ? N + 1 - idx : idx; + if (idx == 0) continue; + idx--; + sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1); + } + output[i] = (sum * mult) >> (2 * DCT_CONST_BITS); + } +} + +void fdst16(const tran_low_t *input, tran_low_t *output) { + static const int N = 16; + static const int sinvalue_lookup_table[] = { + 3011, 5919, 8625, 11038, + 13075, 14666, 15759, 16314 + }; + static const int mult = 15895; // 2*sqrt(4/17) + int i, j; + for (i = 0; i < N; i++) { + int64_t sum = 0; + for (j = 0; j < N; j++) { + int idx = (i + 1) * (j + 1); + int sign = 0; + if (idx > N + 1) { + sign = (idx / (N + 1)) & 1; + idx %= (N + 1); + } + idx = idx > N + 1 - idx ? N + 1 - idx : idx; + if (idx == 0) continue; + idx--; + sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1); + } + output[i] = (sum * mult) >> (2 * DCT_CONST_BITS); + } +} +#endif // CONFIG_EXT_TX + static void fdct4(const tran_low_t *input, tran_low_t *output) { tran_high_t step[4]; tran_high_t temp1, temp2; @@ -510,25 +588,67 @@ static const transform_2d FHT_4[] = { { fdct4, fdct4 }, // DCT_DCT = 0 { fadst4, fdct4 }, // ADST_DCT = 1 { fdct4, fadst4 }, // DCT_ADST = 2 - { fadst4, fadst4 } // ADST_ADST = 3 + { fadst4, fadst4 }, // ADST_ADST = 3 +#if CONFIG_EXT_TX + { fadst4, fdct4 }, // FLIPADST_DCT = 4 + { fdct4, fadst4 }, // DCT_FLIPADST = 5 + { fadst4, fadst4 }, // FLIPADST_FLIPADST = 6 + { fadst4, fadst4 }, // ADST_FLIPADST = 7 + { fadst4, fadst4 }, // FLIPADST_ADST = 8 + { fdst4, fdst4 }, // DST_DST = 9 + { fdst4, fdct4 }, // DST_DCT = 10 + { fdct4, fdst4 }, // DCT_DST = 11 + { fdst4, fadst4 }, // DST_ADST = 12 + { fadst4, fdst4 }, // ADST_DST = 13 + { fdst4, fadst4 }, // DST_FLIPADST = 14 + { fadst4, fdst4 }, // FLIPADST_DST = 15 +#endif // CONFIG_EXT_TX }; static const transform_2d FHT_8[] = { { fdct8, fdct8 }, // DCT_DCT = 0 { fadst8, fdct8 }, // ADST_DCT = 1 { fdct8, fadst8 }, // DCT_ADST = 2 - { fadst8, fadst8 } // ADST_ADST = 3 + { fadst8, fadst8 }, // ADST_ADST = 3 +#if CONFIG_EXT_TX + { fadst8, fdct8 }, // FLIPADST_DCT = 4 + { fdct8, fadst8 }, // DCT_FLIPADST = 5 + { fadst8, fadst8 }, // FLIPADST_FLIPADST = 6 + { fadst8, fadst8 }, // ADST_FLIPADST = 7 + { fadst8, fadst8 }, // FLIPADST_ADST = 8 + { fdst8, fdst8 }, // DST_DST = 9 + { fdst8, fdct8 }, // DST_DCT = 10 + { fdct8, fdst8 }, // DCT_DST = 11 + { fdst8, fadst8 }, // DST_ADST = 12 + { fadst8, fdst8 }, // ADST_DST = 13 + { fdst8, fadst8 }, // DST_FLIPADST = 14 + { fadst8, fdst8 }, // FLIPADST_DST = 15 +#endif // CONFIG_EXT_TX }; static const transform_2d FHT_16[] = { { fdct16, fdct16 }, // DCT_DCT = 0 { fadst16, fdct16 }, // ADST_DCT = 1 { fdct16, fadst16 }, // DCT_ADST = 2 - { fadst16, fadst16 } // ADST_ADST = 3 + { fadst16, fadst16 }, // ADST_ADST = 3 +#if CONFIG_EXT_TX + { fadst16, fdct16 }, // FLIPADST_DCT = 4 + { fdct16, fadst16 }, // DCT_FLIPADST = 5 + { fadst16, fadst16 }, // FLIPADST_FLIPADST = 6 + { fadst16, fadst16 }, // ADST_FLIPADST = 7 + { fadst16, fadst16 }, // FLIPADST_ADST = 8 + { fdst16, fdst16 }, // DST_DST = 9 + { fdst16, fdct16 }, // DST_DCT = 10 + { fdct16, fdst16 }, // DCT_DST = 11 + { fdst16, fadst16 }, // DST_ADST = 12 + { fadst16, fdst16 }, // ADST_DST = 13 + { fdst16, fadst16 }, // DST_FLIPADST = 14 + { fadst16, fdst16 }, // FLIPADST_DST = 15 +#endif // CONFIG_EXT_TX }; void vp10_fht4x4_c(const int16_t *input, tran_low_t *output, - int stride, int tx_type) { + int stride, int tx_type) { if (tx_type == DCT_DCT) { vpx_fdct4x4_c(input, output, stride); } else { @@ -560,15 +680,15 @@ void vp10_fht4x4_c(const int16_t *input, tran_low_t *output, } void vp10_fdct8x8_quant_c(const int16_t *input, int stride, - tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, - const int16_t *zbin_ptr, const int16_t *round_ptr, - const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, - uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan) { + tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, + const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, + uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan) { int eob = -1; int i, j; @@ -672,7 +792,7 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride, } void vp10_fht8x8_c(const int16_t *input, tran_low_t *output, - int stride, int tx_type) { + int stride, int tx_type) { if (tx_type == DCT_DCT) { vpx_fdct8x8_c(input, output, stride); } else { @@ -758,7 +878,7 @@ void vp10_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { } void vp10_fht16x16_c(const int16_t *input, tran_low_t *output, - int stride, int tx_type) { + int stride, int tx_type) { if (tx_type == DCT_DCT) { vpx_fdct16x16_c(input, output, stride); } else { diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index 561835f6a..418a62958 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -424,6 +424,22 @@ void vp10_fwd_txfm_4x4(const int16_t *src_diff, copy_flipud(src_diff, diff_stride, 4, src_diff2, 4); vp10_fht4x4(src_diff2, coeff, 4, ADST_ADST); break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + // Use C version since DST exists only in C + vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type); + break; + case DST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4); + vp10_fht4x4_c(src_diff2, coeff, 4, DST_ADST); + break; + case FLIPADST_DST: + copy_flipud(src_diff, diff_stride, 4, src_diff2, 4); + vp10_fht4x4_c(src_diff2, coeff, 4, ADST_DST); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -464,6 +480,22 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST); break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + // Use C version since DST exists only in C + vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type); + break; + case DST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8_c(src_diff2, coeff, 8, DST_ADST); + break; + case FLIPADST_DST: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8_c(src_diff2, coeff, 8, ADST_DST); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -504,6 +536,22 @@ static void fwd_txfm_8x8_1(const int16_t *src_diff, tran_low_t *coeff, copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST); break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + // Use C version since DST exists only in C + vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type); + break; + case DST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8_c(src_diff2, coeff, 8, DST_ADST); + break; + case FLIPADST_DST: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8_c(src_diff2, coeff, 8, ADST_DST); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -544,6 +592,22 @@ static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST); break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + // Use C version since DST exists only in C + vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type); + break; + case DST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16_c(src_diff2, coeff, 16, DST_ADST); + break; + case FLIPADST_DST: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16_c(src_diff2, coeff, 16, ADST_DST); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -584,6 +648,22 @@ static void fwd_txfm_16x16_1(const int16_t *src_diff, tran_low_t *coeff, copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST); break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + // Use C version since DST exists only in C + vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type); + break; + case DST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16_c(src_diff2, coeff, 16, DST_ADST); + break; + case FLIPADST_DST: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16_c(src_diff2, coeff, 16, ADST_DST); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -665,6 +745,22 @@ void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, copy_flipud(src_diff, diff_stride, 4, src_diff2, 4); vp10_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST); break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + // Use C version since DST exists only in C + vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type); + break; + case DST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4); + vp10_highbd_fht4x4_c(src_diff2, coeff, 4, DST_ADST); + break; + case FLIPADST_DST: + copy_flipud(src_diff, diff_stride, 4, src_diff2, 4); + vp10_highbd_fht4x4_c(src_diff2, coeff, 4, ADST_DST); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -707,6 +803,22 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST); break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + // Use C version since DST exists only in C + vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type); + break; + case DST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8_c(src_diff2, coeff, 8, DST_ADST); + break; + case FLIPADST_DST: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8_c(src_diff2, coeff, 8, ADST_DST); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -749,6 +861,22 @@ static void highbd_fwd_txfm_8x8_1(const int16_t *src_diff, tran_low_t *coeff, copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST); break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + // Use C version since DST exists only in C + vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type); + break; + case DST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8_c(src_diff2, coeff, 8, DST_ADST); + break; + case FLIPADST_DST: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8_c(src_diff2, coeff, 8, ADST_DST); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -791,6 +919,22 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST); break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + // Use C version since DST exists only in C + vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type); + break; + case DST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16_c(src_diff2, coeff, 16, DST_ADST); + break; + case FLIPADST_DST: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16_c(src_diff2, coeff, 16, ADST_DST); + break; #endif // CONFIG_EXT_TX default: assert(0); @@ -833,6 +977,22 @@ static void highbd_fwd_txfm_16x16_1(const int16_t *src_diff, tran_low_t *coeff, copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST); break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + // Use C version since DST exists only in C + vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type); + break; + case DST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16_c(src_diff2, coeff, 16, DST_ADST); + break; + case FLIPADST_DST: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16_c(src_diff2, coeff, 16, ADST_DST); + break; #endif // CONFIG_EXT_TX default: assert(0);