From: Debargha Mukherjee Date: Thu, 3 Sep 2015 09:58:12 +0000 (-0700) Subject: Backport EXT_TX experiment from nextgen X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9fc691efbe1d810b94ffeccefcd830436db33169;p=libvpx Backport EXT_TX experiment from nextgen Does not include DST1 yet. derflr: +1.437 (8-bit internal), +7.243 (12-bit internal) with --enable-ext-tx Change-Id: I91f1759fd2de794755eb6384cda52e80e979cb7d --- diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index 09406a32a..70983ca72 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -78,6 +78,9 @@ typedef struct { // Only for INTER blocks INTERP_FILTER interp_filter; MV_REFERENCE_FRAME ref_frame[2]; +#if CONFIG_EXT_TX + EXT_TX_TYPE ext_txfrm; +#endif // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead. int_mv mv[2]; @@ -218,14 +221,50 @@ static const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { ADST_ADST, // TM }; -static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd, +#if CONFIG_EXT_TX +#define GET_EXT_TX_TYPES(tx_size) \ + ((tx_size) >= TX_32X32 ? 1 : EXT_TX_TYPES) +#define GET_EXT_TX_TREE(tx_size) \ + ((tx_size) >= TX_32X32 ? NULL : vp10_ext_tx_tree) +#define GET_EXT_TX_ENCODINGS(tx_size) \ + ((tx_size) >= TX_32X32 ? NULL : ext_tx_encodings) + +static TX_TYPE ext_tx_to_txtype[EXT_TX_TYPES] = { + DCT_DCT, + ADST_ADST, + FLIPADST_FLIPADST, + ADST_FLIPADST, + FLIPADST_ADST, + ADST_DCT, + DCT_ADST, + FLIPADST_DCT, + DCT_FLIPADST, +}; +#endif // CONFIG_EXT_TX + +static INLINE TX_TYPE get_tx_type_large(PLANE_TYPE plane_type, + const MACROBLOCKD *xd) { + (void) plane_type; + (void) xd; + return DCT_DCT; +} + +static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, + const MACROBLOCKD *xd, int block_idx, TX_SIZE tx_size) { const MODE_INFO *const mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; #if CONFIG_EXT_TX - if (xd->lossless || is_inter_block(mbmi) || tx_size >= TX_32X32) + if (xd->lossless || tx_size >= TX_32X32) return DCT_DCT; + if (is_inter_block(&mi->mbmi)) { + if (plane_type == PLANE_TYPE_Y) { + return ext_tx_to_txtype[mi->mbmi.ext_txfrm]; + } else { + return DCT_DCT; + } + } return intra_mode_to_tx_type_lookup[plane_type == PLANE_TYPE_Y ? get_y_mode(mi, block_idx) : mbmi->uv_mode]; #else diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h index fba7020a5..aedf5776b 100644 --- a/vp10/common/entropy.h +++ b/vp10/common/entropy.h @@ -22,6 +22,7 @@ extern "C" { #endif #define DIFF_UPDATE_PROB 252 +#define GROUP_DIFF_UPDATE_PROB 252 // Coefficient token alphabet #define ZERO_TOKEN 0 // 0 Extra Bits 0+0 diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index cdb6ab9ea..b9d5af9c7 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -314,6 +314,25 @@ static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] { 149, 144, }, }; +#if CONFIG_EXT_TX +const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(EXT_TX_TYPES)] = { + -NORM, 2, + 4, 10, + 6, 8, + -ALT1, -ALT2, + -ALT3, -ALT4, + 12, 14, + -ALT5, -ALT6, + -ALT7, -ALT8, +}; + +static const vpx_prob default_ext_tx_prob[EXT_TX_SIZES][EXT_TX_TYPES - 1] = { + { 240, 128, 128, 128, 128, 128, 128, 128 }, + { 208, 128, 128, 128, 128, 128, 128, 128 }, + { 176, 128, 128, 128, 128, 128, 128, 128 }, +}; +#endif // CONFIG_EXT_TX + static void init_mode_probs(FRAME_CONTEXT *fc) { vp10_copy(fc->uv_mode_prob, default_if_uv_probs); vp10_copy(fc->y_mode_prob, default_if_y_probs); @@ -326,6 +345,9 @@ static void init_mode_probs(FRAME_CONTEXT *fc) { fc->tx_probs = default_tx_probs; vp10_copy(fc->skip_probs, default_skip_probs); vp10_copy(fc->inter_mode_probs, default_inter_mode_probs); +#if CONFIG_EXT_TX + vp10_copy(fc->ext_tx_prob, default_ext_tx_prob); +#endif // CONFIG_EXT_TX } const vpx_tree_index vp10_switchable_interp_tree @@ -405,6 +427,13 @@ void vp10_adapt_mode_probs(VP10_COMMON *cm) { for (i = 0; i < SKIP_CONTEXTS; ++i) fc->skip_probs[i] = mode_mv_merge_probs( pre_fc->skip_probs[i], counts->skip[i]); + +#if CONFIG_EXT_TX + for (i = TX_4X4; i <= TX_16X16; ++i) { + vpx_tree_merge_probs(vp10_ext_tx_tree, pre_fc->ext_tx_prob[i], + counts->ext_tx[i], fc->ext_tx_prob[i]); + } +#endif // CONFIG_EXT_TX } static void set_default_lf_deltas(struct loopfilter *lf) { diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h index a8b749253..bdae9ee6f 100644 --- a/vp10/common/entropymode.h +++ b/vp10/common/entropymode.h @@ -56,6 +56,9 @@ typedef struct frame_contexts { struct tx_probs tx_probs; vpx_prob skip_probs[SKIP_CONTEXTS]; nmv_context nmvc; +#if CONFIG_EXT_TX + vpx_prob ext_tx_prob[EXT_TX_SIZES][EXT_TX_TYPES - 1]; +#endif // CONFIG_EXT_TX int initialized; } FRAME_CONTEXT; @@ -76,6 +79,9 @@ typedef struct FRAME_COUNTS { struct tx_counts tx; unsigned int skip[SKIP_CONTEXTS][2]; nmv_context_counts mv; +#if CONFIG_EXT_TX + unsigned int ext_tx[EXT_TX_SIZES][EXT_TX_TYPES]; +#endif // CONFIG_EXT_TX } FRAME_COUNTS; extern const vpx_prob vp10_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; @@ -100,6 +106,10 @@ void vp10_tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, void vp10_tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, unsigned int (*ct_8x8p)[2]); +#if CONFIG_EXT_TX +extern const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(EXT_TX_TYPES)]; +#endif // CONFIG_EXT_TX + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp10/common/enums.h b/vp10/common/enums.h index a226a2d69..47c99df44 100644 --- a/vp10/common/enums.h +++ b/vp10/common/enums.h @@ -94,9 +94,33 @@ typedef enum { ADST_DCT = 1, // ADST in vertical, DCT in horizontal DCT_ADST = 2, // DCT in vertical, ADST in horizontal ADST_ADST = 3, // ADST in both directions - TX_TYPES = 4 + TX_TYPES, +#if CONFIG_EXT_TX + FLIPADST_DCT = 4, + DCT_FLIPADST = 5, + FLIPADST_FLIPADST = 6, + ADST_FLIPADST = 7, + FLIPADST_ADST = 8, + TOTAL_TX_TYPES, +#endif // CONFIG_EXT_TX } TX_TYPE; +#if CONFIG_EXT_TX +#define EXT_TX_SIZES 3 // number of sizes that use extended transforms +typedef enum { + NORM = 0, + ALT1 = 1, + ALT2 = 2, + ALT3 = 3, + ALT4 = 4, + ALT5 = 5, + ALT6 = 6, + ALT7 = 7, + ALT8 = 8, + EXT_TX_TYPES +} EXT_TX_TYPE; +#endif // CONFIG_EXT_TX + typedef enum { VP9_LAST_FLAG = 1 << 0, VP9_GOLD_FLAG = 1 << 1, diff --git a/vp10/common/idct.c b/vp10/common/idct.c index 144afc34b..6533690ef 100644 --- a/vp10/common/idct.c +++ b/vp10/common/idct.c @@ -57,6 +57,74 @@ static const transform_2d IHT_8[] = { { iadst8_c, iadst8_c } // ADST_ADST = 3 }; +#if CONFIG_EXT_TX +void fliplr(uint8_t *dest, int stride, int l) { + int i, j; + for (i = 0; i < l; ++i) { + for (j = 0; j < l / 2; ++j) { + const uint8_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[i * stride + l - 1 - j]; + dest[i * stride + l - 1 - j] = tmp; + } + } +} + +void flipud(uint8_t *dest, int stride, int l) { + int i, j; + for (j = 0; j < l; ++j) { + for (i = 0; i < l / 2; ++i) { + const uint8_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[(l - 1 - i) * stride + j]; + dest[(l - 1 - i) * stride + j] = tmp; + } + } +} + +void fliplrud(uint8_t *dest, int stride, int l) { + int i, j; + for (i = 0; i < l / 2; ++i) { + for (j = 0; j < l; ++j) { + const uint8_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[(l - 1 - i) * stride + l - 1 - j]; + dest[(l - 1 - i) * stride + l - 1 - j] = tmp; + } + } +} + +void fliplr16(uint16_t *dest, int stride, int l) { + int i, j; + for (i = 0; i < l; ++i) { + for (j = 0; j < l / 2; ++j) { + const uint16_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[i * stride + l - 1 - j]; + dest[i * stride + l - 1 - j] = tmp; + } + } +} + +void flipud16(uint16_t *dest, int stride, int l) { + int i, j; + for (j = 0; j < l; ++j) { + for (i = 0; i < l / 2; ++i) { + const uint16_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[(l - 1 - i) * stride + j]; + dest[(l - 1 - i) * stride + j] = tmp; + } + } +} + +void fliplrud16(uint16_t *dest, int stride, int l) { + int i, j; + for (i = 0; i < l / 2; ++i) { + for (j = 0; j < l; ++j) { + const uint16_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[(l - 1 - i) * stride + l - 1 - j]; + dest[(l - 1 - i) * stride + l - 1 - j] = tmp; + } + } +} +#endif // CONFIG_EXT_TX + void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { int i, j; @@ -178,10 +246,11 @@ void vp10_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, vpx_idct32x32_1024_add(input, dest, stride); } -void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type, - void (*itxm_add_4x4)(const tran_low_t *input, - uint8_t *dest, int stride, int eob)) { +void vp10_inv_txfm_add_4x4( + const tran_low_t *input, uint8_t *dest, + int stride, int eob, TX_TYPE tx_type, + void (*itxm_add_4x4)(const tran_low_t *input, + uint8_t *dest, int stride, int eob)) { switch (tx_type) { case DCT_DCT: itxm_add_4x4(input, dest, stride, eob); @@ -191,6 +260,33 @@ void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, case ADST_ADST: vp10_iht4x4_16_add(input, dest, stride, tx_type); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + flipud(dest, stride, 4); + vp10_iht4x4_16_add(input, dest, stride, ADST_DCT); + flipud(dest, stride, 4); + break; + case DCT_FLIPADST: + fliplr(dest, stride, 4); + vp10_iht4x4_16_add(input, dest, stride, DCT_ADST); + fliplr(dest, stride, 4); + break; + case FLIPADST_FLIPADST: + fliplrud(dest, stride, 4); + vp10_iht4x4_16_add(input, dest, stride, ADST_ADST); + fliplrud(dest, stride, 4); + break; + case ADST_FLIPADST: + fliplr(dest, stride, 4); + vp10_iht4x4_16_add(input, dest, stride, ADST_ADST); + fliplr(dest, stride, 4); + break; + case FLIPADST_ADST: + flipud(dest, stride, 4); + vp10_iht4x4_16_add(input, dest, stride, ADST_ADST); + flipud(dest, stride, 4); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -208,6 +304,33 @@ void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, case ADST_ADST: vp10_iht8x8_64_add(input, dest, stride, tx_type); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + flipud(dest, stride, 8); + vp10_iht8x8_64_add(input, dest, stride, ADST_DCT); + flipud(dest, stride, 8); + break; + case DCT_FLIPADST: + fliplr(dest, stride, 8); + vp10_iht8x8_64_add(input, dest, stride, DCT_ADST); + fliplr(dest, stride, 8); + break; + case FLIPADST_FLIPADST: + fliplrud(dest, stride, 8); + vp10_iht8x8_64_add(input, dest, stride, ADST_ADST); + fliplrud(dest, stride, 8); + break; + case ADST_FLIPADST: + fliplr(dest, stride, 8); + vp10_iht8x8_64_add(input, dest, stride, ADST_ADST); + fliplr(dest, stride, 8); + break; + case FLIPADST_ADST: + flipud(dest, stride, 8); + vp10_iht8x8_64_add(input, dest, stride, ADST_ADST); + flipud(dest, stride, 8); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -225,6 +348,33 @@ void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, case ADST_ADST: vp10_iht16x16_256_add(input, dest, stride, tx_type); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + flipud(dest, stride, 16); + vp10_iht16x16_256_add(input, dest, stride, ADST_DCT); + flipud(dest, stride, 16); + break; + case DCT_FLIPADST: + fliplr(dest, stride, 16); + vp10_iht16x16_256_add(input, dest, stride, DCT_ADST); + fliplr(dest, stride, 16); + break; + case FLIPADST_FLIPADST: + fliplrud(dest, stride, 16); + vp10_iht16x16_256_add(input, dest, stride, ADST_ADST); + fliplrud(dest, stride, 16); + break; + case ADST_FLIPADST: + fliplr(dest, stride, 16); + vp10_iht16x16_256_add(input, dest, stride, ADST_ADST); + fliplr(dest, stride, 16); + break; + case FLIPADST_ADST: + flipud(dest, stride, 16); + vp10_iht16x16_256_add(input, dest, stride, ADST_ADST); + flipud(dest, stride, 16); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -430,6 +580,33 @@ void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, case ADST_ADST: vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4); + vp10_highbd_iht4x4_16_add(input, dest, stride, ADST_DCT, bd); + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4); + break; + case DCT_FLIPADST: + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4); + vp10_highbd_iht4x4_16_add(input, dest, stride, DCT_ADST, bd); + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4); + break; + case FLIPADST_FLIPADST: + fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 4); + vp10_highbd_iht4x4_16_add(input, dest, stride, ADST_ADST, bd); + fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 4); + break; + case ADST_FLIPADST: + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4); + vp10_highbd_iht4x4_16_add(input, dest, stride, ADST_ADST, bd); + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4); + break; + case FLIPADST_ADST: + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4); + vp10_highbd_iht4x4_16_add(input, dest, stride, ADST_ADST, bd); + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -448,6 +625,33 @@ void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, case ADST_ADST: vp10_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8); + vp10_highbd_iht8x8_64_add(input, dest, stride, ADST_DCT, bd); + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8); + break; + case DCT_FLIPADST: + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8); + vp10_highbd_iht8x8_64_add(input, dest, stride, DCT_ADST, bd); + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8); + break; + case FLIPADST_FLIPADST: + fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 8); + vp10_highbd_iht8x8_64_add(input, dest, stride, ADST_ADST, bd); + fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 8); + break; + case ADST_FLIPADST: + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8); + vp10_highbd_iht8x8_64_add(input, dest, stride, ADST_ADST, bd); + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8); + break; + case FLIPADST_ADST: + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8); + vp10_highbd_iht8x8_64_add(input, dest, stride, ADST_ADST, bd); + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -466,6 +670,33 @@ void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, case ADST_ADST: vp10_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16); + vp10_highbd_iht16x16_256_add(input, dest, stride, ADST_DCT, bd); + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16); + break; + case DCT_FLIPADST: + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16); + vp10_highbd_iht16x16_256_add(input, dest, stride, DCT_ADST, bd); + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16); + break; + case FLIPADST_FLIPADST: + fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 16); + vp10_highbd_iht16x16_256_add(input, dest, stride, ADST_ADST, bd); + fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 16); + break; + case ADST_FLIPADST: + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16); + vp10_highbd_iht16x16_256_add(input, dest, stride, ADST_ADST, bd); + fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16); + break; + case FLIPADST_ADST: + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16); + vp10_highbd_iht16x16_256_add(input, dest, stride, ADST_ADST, bd); + flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; diff --git a/vp10/common/scan.c b/vp10/common/scan.c index 7217f6d04..eb5e58479 100644 --- a/vp10/common/scan.c +++ b/vp10/common/scan.c @@ -702,7 +702,7 @@ const scan_order vp10_default_scan_orders[TX_SIZES] = { {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, }; -const scan_order vp10_scan_orders[TX_SIZES][TX_TYPES] = { +const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { { // TX_4X4 {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors}, @@ -725,3 +725,49 @@ const scan_order vp10_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, } }; + +#if CONFIG_EXT_TX +const scan_order vp10_inter_scan_orders[TX_SIZES][TOTAL_TX_TYPES] = { + { // TX_4X4 + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + }, { // TX_8X8 + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + }, { // TX_16X16 + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, + }, { // TX_32X32 + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + } +}; +#endif // CONFIG_EXT_TX diff --git a/vp10/common/scan.h b/vp10/common/scan.h index f5a020f1e..09dab1a69 100644 --- a/vp10/common/scan.h +++ b/vp10/common/scan.h @@ -30,7 +30,7 @@ typedef struct { } scan_order; extern const scan_order vp10_default_scan_orders[TX_SIZES]; -extern const scan_order vp10_scan_orders[TX_SIZES][TX_TYPES]; +extern const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES]; static INLINE int get_coef_context(const int16_t *neighbors, const uint8_t *token_cache, int c) { @@ -38,8 +38,31 @@ static INLINE int get_coef_context(const int16_t *neighbors, token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; } -static INLINE const scan_order *get_scan(TX_SIZE tx_size, TX_TYPE tx_type) { - return &vp10_scan_orders[tx_size][tx_type]; +static INLINE const scan_order *get_intra_scan(TX_SIZE tx_size, + TX_TYPE tx_type) { + return &vp10_intra_scan_orders[tx_size][tx_type]; +} + +#if CONFIG_EXT_TX +extern const scan_order vp10_inter_scan_orders[TX_SIZES][TOTAL_TX_TYPES]; + +static INLINE const scan_order *get_inter_scan(TX_SIZE tx_size, + TX_TYPE tx_type) { + return &vp10_inter_scan_orders[tx_size][tx_type]; +} +#endif // CONFIG_EXT_TX + +static INLINE const scan_order *get_scan(TX_SIZE tx_size, + TX_TYPE tx_type, + int is_inter) { +#if CONFIG_EXT_TX + return + is_inter ? &vp10_inter_scan_orders[tx_size][tx_type] : + &vp10_intra_scan_orders[tx_size][tx_type]; +#else + (void) is_inter; + return &vp10_intra_scan_orders[tx_size][tx_type]; +#endif // CONFIG_EXT_TX } #ifdef __cplusplus diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index e113cdbe2..818c93359 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -348,7 +348,7 @@ static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd, if (!mbmi->skip) { TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size); - const scan_order *sc = get_scan(tx_size, tx_type); + const scan_order *sc = get_scan(tx_size, tx_type, 0); const int eob = vp10_decode_block_tokens(xd, plane, sc, col, row, tx_size, r, mbmi->segment_id); inverse_transform_block_intra(xd, plane, tx_type, tx_size, @@ -363,7 +363,7 @@ static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r, PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; int block_idx = (row << 1) + col; TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size); - const scan_order *sc = get_scan(tx_size, tx_type); + const scan_order *sc = get_scan(tx_size, tx_type, 1); const int eob = vp10_decode_block_tokens(xd, plane, sc, col, row, tx_size, r, mbmi->segment_id); @@ -1970,6 +1970,17 @@ static size_t read_uncompressed_header(VP10Decoder *pbi, return sz; } +#if CONFIG_EXT_TX +static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { + int i, j; + if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + for (j = TX_4X4; j <= TX_16X16; ++j) + for (i = 0; i < EXT_TX_TYPES - 1; ++i) + vp10_diff_update_prob(r, &fc->ext_tx_prob[j][i]); + } +} +#endif // CONFIG_EXT_TX + static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data, size_t partition_size) { VP10_COMMON *const cm = &pbi->common; @@ -2017,6 +2028,9 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data, vp10_diff_update_prob(&r, &fc->partition_prob[j][i]); read_mv_probs(nmvc, cm->allow_high_precision_mv, &r); +#if CONFIG_EXT_TX + read_ext_tx_probs(fc, &r); +#endif } return vpx_reader_has_error(&r); @@ -2056,6 +2070,10 @@ static void debug_check_frame_counts(const VP10_COMMON *const cm) { assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx))); assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip))); assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv))); +#if CONFIG_EXT_TX + assert(!memcmp(cm->counts.ext_tx, zero_counts.ext_tx, + sizeof(cm->counts.ext_tx))); +#endif // CONFIG_EXT_TX } #endif // NDEBUG diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c index 5fbe2ca07..71a5e2817 100644 --- a/vp10/decoder/decodemv.c +++ b/vp10/decoder/decodemv.c @@ -597,6 +597,22 @@ static void read_inter_frame_mode_info(VP10Decoder *const pbi, mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); +#if CONFIG_EXT_TX + if (inter_block && + mbmi->tx_size <= TX_16X16 && + cm->base_qindex > 0 && + mbmi->sb_type >= BLOCK_8X8 && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && + !mbmi->skip) { + mbmi->ext_txfrm = vpx_read_tree(r, + vp10_ext_tx_tree, + cm->fc->ext_tx_prob[mbmi->tx_size]); + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.ext_tx[mbmi->tx_size][mbmi->ext_txfrm]; + } else { + mbmi->ext_txfrm = NORM; + } +#endif // CONFIG_EXT_TX if (inter_block) read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r); diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index b872b4e7d..b9019bab4 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -44,6 +44,16 @@ static const struct vp10_token partition_encodings[PARTITION_TYPES] = static const struct vp10_token inter_mode_encodings[INTER_MODES] = {{2, 2}, {6, 3}, {0, 1}, {7, 3}}; +#if CONFIG_EXT_TX +static struct vp10_token ext_tx_encodings[EXT_TX_TYPES]; +#endif // CONFIG_EXT_TX + +void vp10_encode_token_init() { +#if CONFIG_EXT_TX + vp10_tokens_from_tree(ext_tx_encodings, vp10_ext_tx_tree); +#endif // CONFIG_EXT_TX +} + static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode, const vpx_prob *probs) { vp10_write_token(w, vp10_intra_mode_tree, probs, &intra_mode_encodings[mode]); @@ -76,6 +86,24 @@ static void prob_diff_update(const vpx_tree_index *tree, vp10_cond_prob_diff_update(w, &probs[i], branch_ct[i]); } +static int prob_diff_update_savings(const vpx_tree_index *tree, + vpx_prob probs[/*n - 1*/], + const unsigned int counts[/*n - 1*/], + int n) { + int i; + unsigned int branch_ct[32][2]; + int savings = 0; + + // Assuming max number of probabilities <= 32 + assert(n <= 32); + vp10_tree_probs_from_distribution(tree, branch_ct, counts); + for (i = 0; i < n - 1; ++i) { + savings += vp10_cond_prob_diff_update_savings(&probs[i], + branch_ct[i]); + } + return savings; +} + static void write_selected_tx_size(const VP10_COMMON *cm, const MACROBLOCKD *xd, vpx_writer *w) { TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size; @@ -119,6 +147,29 @@ static void update_switchable_interp_probs(VP10_COMMON *cm, vpx_writer *w, counts->switchable_interp[j], SWITCHABLE_FILTERS, w); } +#if CONFIG_EXT_TX +static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) { + const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - + vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); + int i; + int savings = 0; + int do_update = 0; + for (i = TX_4X4; i <= TX_16X16; ++i) { + savings += prob_diff_update_savings( + vp10_ext_tx_tree, cm->fc->ext_tx_prob[i], + cm->counts.ext_tx[i], EXT_TX_TYPES); + } + do_update = savings > savings_thresh; + vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + if (do_update) { + for (i = TX_4X4; i <= TX_16X16; ++i) { + prob_diff_update(vp10_ext_tx_tree, cm->fc->ext_tx_prob[i], + cm->counts.ext_tx[i], EXT_TX_TYPES, w); + } + } +} +#endif // CONFIG_EXT_TX + static void pack_mb_tokens(vpx_writer *w, TOKENEXTRA **tp, const TOKENEXTRA *const stop, vpx_bit_depth_t bit_depth) { @@ -273,6 +324,18 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi, write_selected_tx_size(cm, xd, w); } +#if CONFIG_EXT_TX + if (is_inter && + mbmi->tx_size <= TX_16X16 && + cm->base_qindex > 0 && + bsize >= BLOCK_8X8 && + !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + vp10_write_token(w, vp10_ext_tx_tree, cm->fc->ext_tx_prob[mbmi->tx_size], + &ext_tx_encodings[mbmi->ext_txfrm]); + } +#endif // CONFIG_EXT_TX + if (!is_inter) { if (bsize >= BLOCK_8X8) { write_intra_mode(w, mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]); @@ -1219,6 +1282,9 @@ static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) { vp10_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc, &counts->mv); +#if CONFIG_EXT_TX + update_ext_tx_probs(cm, &header_bc); +#endif } vpx_stop_encode(&header_bc); diff --git a/vp10/encoder/bitstream.h b/vp10/encoder/bitstream.h index d4b413d0d..b8b2354d5 100644 --- a/vp10/encoder/bitstream.h +++ b/vp10/encoder/bitstream.h @@ -20,6 +20,8 @@ extern "C" { void vp10_pack_bitstream(VP10_COMP *cpi, uint8_t *dest, size_t *size); +void vp10_encode_token_init(); + static INLINE int vp10_preserve_existing_gf(VP10_COMP *cpi) { return !cpi->multi_arf_allowed && cpi->refresh_golden_frame && cpi->rc.is_src_frame_alt_ref && diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index 027772304..e41a7b646 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -4208,5 +4208,15 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, } ++td->counts->tx.tx_totals[mbmi->tx_size]; ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])]; +#if CONFIG_EXT_TX + if (is_inter_block(mbmi) && + mbmi->tx_size <= TX_16X16 && + cm->base_qindex > 0 && + bsize >= BLOCK_8X8 && + !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + ++td->counts->ext_tx[mbmi->tx_size][mbmi->ext_txfrm]; + } +#endif // CONFIG_EXT_TX } } diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index 51b47c9fe..561835f6a 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -105,7 +105,8 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, const int16_t *dequant_ptr = pd->dequant; const uint8_t *const band_translate = get_band_translate(tx_size); TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size); - const scan_order *const so = get_scan(tx_size, tx_type); + const scan_order *const so = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); const int16_t *const scan = so->scan; const int16_t *const nb = so->neighbors; int next = eob, sz = 0; @@ -301,6 +302,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, final_eob++; mb->plane[plane].eobs[block] = final_eob; + assert(final_eob <= default_eob); return final_eob; } @@ -323,183 +325,75 @@ static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, } #endif // CONFIG_VP9_HIGHBITDEPTH -void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { - MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; - TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - const scan_order *const scan_order = get_scan(tx_size, tx_type); - tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); - tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); - tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - uint16_t *const eob = &p->eobs[block]; - const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - int i, j; - const int16_t *src_diff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; +#if CONFIG_EXT_TX +static void copy_block(const int16_t *src, int src_stride, int l, + int16_t *dest, int dest_stride) { + int i; + for (i = 0; i < l; ++i) { + memcpy(dest + dest_stride * i, src + src_stride * i, + l * sizeof(int16_t)); + } +} -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - switch (tx_size) { - case TX_32X32: - highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); - vp10_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, - p->round_fp, p->quant_fp, p->quant_shift, - qcoeff, dqcoeff, pd->dequant, - eob, scan_order->scan, - scan_order->iscan); - break; - case TX_16X16: - vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); - vp10_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_8X8: - vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); - vp10_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_4X4: - x->fwd_txm4x4(src_diff, coeff, diff_stride); - vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - default: - assert(0); +static void fliplr(int16_t *dest, int stride, int l) { + int i, j; + for (i = 0; i < l; ++i) { + for (j = 0; j < l / 2; ++j) { + const int16_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[i * stride + l - 1 - j]; + dest[i * stride + l - 1 - j] = tmp; } - return; } -#endif // CONFIG_VP9_HIGHBITDEPTH +} - switch (tx_size) { - case TX_32X32: - fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); - vp10_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, - scan_order->iscan); - break; - case TX_16X16: - vpx_fdct16x16(src_diff, coeff, diff_stride); - vp10_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_8X8: - vp10_fdct8x8_quant(src_diff, diff_stride, coeff, 64, - x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - case TX_4X4: - x->fwd_txm4x4(src_diff, coeff, diff_stride); - vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, - scan_order->scan, scan_order->iscan); - break; - default: - assert(0); - break; +static void flipud(int16_t *dest, int stride, int l) { + int i, j; + for (j = 0; j < l; ++j) { + for (i = 0; i < l / 2; ++i) { + const int16_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[(l - 1 - i) * stride + j]; + dest[(l - 1 - i) * stride + j] = tmp; + } } } -void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { - MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); - tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); - tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - uint16_t *const eob = &p->eobs[block]; - const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; +static void fliplrud(int16_t *dest, int stride, int l) { int i, j; - const int16_t *src_diff; - - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; - -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - switch (tx_size) { - case TX_32X32: - vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride); - vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_16X16: - vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride); - vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_8X8: - vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride); - vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_4X4: - x->fwd_txm4x4(src_diff, coeff, diff_stride); - vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - default: - assert(0); + for (i = 0; i < l / 2; ++i) { + for (j = 0; j < l; ++j) { + const int16_t tmp = dest[i * stride + j]; + dest[i * stride + j] = dest[(l - 1 - i) * stride + l - 1 - j]; + dest[(l - 1 - i) * stride + l - 1 - j] = tmp; } - return; } -#endif // CONFIG_VP9_HIGHBITDEPTH +} - switch (tx_size) { - case TX_32X32: - vpx_fdct32x32_1(src_diff, coeff, diff_stride); - vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_16X16: - vpx_fdct16x16_1(src_diff, coeff, diff_stride); - vpx_quantize_dc(coeff, 256, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_8X8: - vpx_fdct8x8_1(src_diff, coeff, diff_stride); - vpx_quantize_dc(coeff, 64, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - case TX_4X4: - x->fwd_txm4x4(src_diff, coeff, diff_stride); - vpx_quantize_dc(coeff, 16, x->skip_block, p->round, - p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); - break; - default: - assert(0); - break; - } +static void copy_fliplr(const int16_t *src, int src_stride, int l, + int16_t *dest, int dest_stride) { + copy_block(src, src_stride, l, dest, dest_stride); + fliplr(dest, dest_stride, l); +} + +static void copy_flipud(const int16_t *src, int src_stride, int l, + int16_t *dest, int dest_stride) { + copy_block(src, src_stride, l, dest, dest_stride); + flipud(dest, dest_stride, l); } +static void copy_fliplrud(const int16_t *src, int src_stride, int l, + int16_t *dest, int dest_stride) { + copy_block(src, src_stride, l, dest, dest_stride); + fliplrud(dest, dest_stride, l); +} +#endif // CONFIG_EXT_TX + void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, void (*fwd_txm4x4)(const int16_t *input, - tran_low_t *output, int stride)) { + tran_low_t *output, int stride)) { +#if CONFIG_EXT_TX + int16_t src_diff2[16]; +#endif // CONFIG_EXT_TX switch (tx_type) { case DCT_DCT: fwd_txm4x4(src_diff, coeff, diff_stride); @@ -509,6 +403,28 @@ void vp10_fwd_txfm_4x4(const int16_t *src_diff, case ADST_ADST: vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + copy_flipud(src_diff, diff_stride, 4, src_diff2, 4); + vp10_fht4x4(src_diff2, coeff, 4, ADST_DCT); + break; + case DCT_FLIPADST: + copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4); + vp10_fht4x4(src_diff2, coeff, 4, DCT_ADST); + break; + case FLIPADST_FLIPADST: + copy_fliplrud(src_diff, diff_stride, 4, src_diff2, 4); + vp10_fht4x4(src_diff2, coeff, 4, ADST_ADST); + break; + case ADST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4); + vp10_fht4x4(src_diff2, coeff, 4, ADST_ADST); + break; + case FLIPADST_ADST: + copy_flipud(src_diff, diff_stride, 4, src_diff2, 4); + vp10_fht4x4(src_diff2, coeff, 4, ADST_ADST); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -517,6 +433,9 @@ void vp10_fwd_txfm_4x4(const int16_t *src_diff, static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type) { +#if CONFIG_EXT_TX + int16_t src_diff2[64]; +#endif // CONFIG_EXT_TX switch (tx_type) { case DCT_DCT: case ADST_DCT: @@ -524,6 +443,68 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, case ADST_ADST: vp10_fht8x8(src_diff, coeff, diff_stride, tx_type); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8(src_diff2, coeff, 8, ADST_DCT); + break; + case DCT_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8(src_diff2, coeff, 8, DCT_ADST); + break; + case FLIPADST_FLIPADST: + copy_fliplrud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; + case ADST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; + case FLIPADST_ADST: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; + } +} + +static void fwd_txfm_8x8_1(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type) { +#if CONFIG_EXT_TX + int16_t src_diff2[64]; +#endif // CONFIG_EXT_TX + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vpx_fdct8x8_1(src_diff, coeff, diff_stride); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8(src_diff2, coeff, 8, ADST_DCT); + break; + case DCT_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8(src_diff2, coeff, 8, DCT_ADST); + break; + case FLIPADST_FLIPADST: + copy_fliplrud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; + case ADST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; + case FLIPADST_ADST: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -532,6 +513,9 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type) { +#if CONFIG_EXT_TX + int16_t src_diff2[256]; +#endif // CONFIG_EXT_TX switch (tx_type) { case DCT_DCT: case ADST_DCT: @@ -539,6 +523,68 @@ static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, case ADST_ADST: vp10_fht16x16(src_diff, coeff, diff_stride, tx_type); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16(src_diff2, coeff, 16, ADST_DCT); + break; + case DCT_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16(src_diff2, coeff, 16, DCT_ADST); + break; + case FLIPADST_FLIPADST: + copy_fliplrud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; + case ADST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; + case FLIPADST_ADST: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; + } +} + +static void fwd_txfm_16x16_1(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type) { +#if CONFIG_EXT_TX + int16_t src_diff2[256]; +#endif // CONFIG_EXT_TX + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vpx_fdct16x16_1(src_diff, coeff, diff_stride); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16(src_diff2, coeff, 16, ADST_DCT); + break; + case DCT_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16(src_diff2, coeff, 16, DCT_ADST); + break; + case FLIPADST_FLIPADST: + copy_fliplrud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; + case ADST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; + case FLIPADST_ADST: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -563,11 +609,32 @@ static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, } } +static void fwd_txfm_32x32_1(const int16_t *src_diff, + tran_low_t *coeff, int diff_stride, + TX_TYPE tx_type) { + switch (tx_type) { + case DCT_DCT: + vpx_fdct32x32_1(src_diff, coeff, diff_stride); + break; + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + assert(0); + break; + default: + assert(0); + break; + } +} + #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, void (*highbd_fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride)) { +#if CONFIG_EXT_TX + int16_t src_diff2[16]; +#endif // CONFIG_EXT_TX switch (tx_type) { case DCT_DCT: highbd_fwd_txm4x4(src_diff, coeff, diff_stride); @@ -577,6 +644,28 @@ void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, case ADST_ADST: vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + copy_flipud(src_diff, diff_stride, 4, src_diff2, 4); + vp10_highbd_fht4x4(src_diff2, coeff, 4, ADST_DCT); + break; + case DCT_FLIPADST: + copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4); + vp10_highbd_fht4x4(src_diff2, coeff, 4, DCT_ADST); + break; + case FLIPADST_FLIPADST: + copy_fliplrud(src_diff, diff_stride, 4, src_diff2, 4); + vp10_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST); + break; + case ADST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4); + vp10_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST); + break; + case FLIPADST_ADST: + copy_flipud(src_diff, diff_stride, 4, src_diff2, 4); + vp10_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -584,7 +673,10 @@ void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, } static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type) { + int diff_stride, TX_TYPE tx_type) { +#if CONFIG_EXT_TX + int16_t src_diff2[64]; +#endif // CONFIG_EXT_TX switch (tx_type) { case DCT_DCT: vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); @@ -594,6 +686,70 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, case ADST_ADST: vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_DCT); + break; + case DCT_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8(src_diff2, coeff, 8, DCT_ADST); + break; + case FLIPADST_FLIPADST: + copy_fliplrud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; + case ADST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; + case FLIPADST_ADST: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; + } +} + +static void highbd_fwd_txfm_8x8_1(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type) { +#if CONFIG_EXT_TX + int16_t src_diff2[64]; +#endif // CONFIG_EXT_TX + switch (tx_type) { + case DCT_DCT: + vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride); + break; + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_DCT); + break; + case DCT_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8(src_diff2, coeff, 8, DCT_ADST); + break; + case FLIPADST_FLIPADST: + copy_fliplrud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; + case ADST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; + case FLIPADST_ADST: + copy_flipud(src_diff, diff_stride, 8, src_diff2, 8); + vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -601,7 +757,10 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, } static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type) { + int diff_stride, TX_TYPE tx_type) { +#if CONFIG_EXT_TX + int16_t src_diff2[256]; +#endif // CONFIG_EXT_TX switch (tx_type) { case DCT_DCT: vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); @@ -611,6 +770,70 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, case ADST_ADST: vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_DCT); + break; + case DCT_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16(src_diff2, coeff, 16, DCT_ADST); + break; + case FLIPADST_FLIPADST: + copy_fliplrud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; + case ADST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; + case FLIPADST_ADST: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; +#endif // CONFIG_EXT_TX + default: + assert(0); + break; + } +} + +static void highbd_fwd_txfm_16x16_1(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type) { +#if CONFIG_EXT_TX + int16_t src_diff2[256]; +#endif // CONFIG_EXT_TX + switch (tx_type) { + case DCT_DCT: + vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride); + break; + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); + break; +#if CONFIG_EXT_TX + case FLIPADST_DCT: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_DCT); + break; + case DCT_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16(src_diff2, coeff, 16, DCT_ADST); + break; + case FLIPADST_FLIPADST: + copy_fliplrud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; + case ADST_FLIPADST: + copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; + case FLIPADST_ADST: + copy_flipud(src_diff, diff_stride, 16, src_diff2, 16); + vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST); + break; +#endif // CONFIG_EXT_TX default: assert(0); break; @@ -634,8 +857,207 @@ static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, break; } } + +static void highbd_fwd_txfm_32x32_1(const int16_t *src_diff, + tran_low_t *coeff, int diff_stride, + TX_TYPE tx_type) { + switch (tx_type) { + case DCT_DCT: + vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride); + break; + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + assert(0); + break; + default: + assert(0); + break; + } +} #endif // CONFIG_VP9_HIGHBITDEPTH +void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblock_plane *const p = &x->plane[plane]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const scan_order *const scan_order = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + uint16_t *const eob = &p->eobs[block]; + const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + int i, j; + const int16_t *src_diff; + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); + src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + switch (tx_size) { + case TX_32X32: + highbd_fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride, + tx_type); + vp10_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, + p->round_fp, p->quant_fp, p->quant_shift, + qcoeff, dqcoeff, pd->dequant, + eob, scan_order->scan, + scan_order->iscan); + break; + case TX_16X16: + highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type); + vp10_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, + scan_order->scan, scan_order->iscan); + break; + case TX_8X8: + highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type); + vp10_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, + scan_order->scan, scan_order->iscan); + break; + case TX_4X4: + vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, + x->fwd_txm4x4); + vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, + scan_order->scan, scan_order->iscan); + break; + default: + assert(0); + } + return; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + switch (tx_size) { + case TX_32X32: + fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride, tx_type); + vp10_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, scan_order->scan, + scan_order->iscan); + break; + case TX_16X16: + fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type); + vp10_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, + scan_order->scan, scan_order->iscan); + break; + case TX_8X8: + fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type); + vp10_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, + scan_order->scan, scan_order->iscan); + break; + case TX_4X4: + vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, + x->fwd_txm4x4); + vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, + scan_order->scan, scan_order->iscan); + break; + default: + assert(0); + break; + } +} + +void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblock_plane *const p = &x->plane[plane]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + uint16_t *const eob = &p->eobs[block]; + const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + int i, j; + const int16_t *src_diff; + + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); + src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + switch (tx_size) { + case TX_32X32: + highbd_fwd_txfm_32x32_1(src_diff, coeff, diff_stride, tx_type); + vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round, + p->quant_fp[0], qcoeff, dqcoeff, + pd->dequant[0], eob); + break; + case TX_16X16: + highbd_fwd_txfm_16x16_1(src_diff, coeff, diff_stride, tx_type); + vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round, + p->quant_fp[0], qcoeff, dqcoeff, + pd->dequant[0], eob); + break; + case TX_8X8: + highbd_fwd_txfm_8x8_1(src_diff, coeff, diff_stride, tx_type); + vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round, + p->quant_fp[0], qcoeff, dqcoeff, + pd->dequant[0], eob); + break; + case TX_4X4: + vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, + x->fwd_txm4x4); + vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, + p->quant_fp[0], qcoeff, dqcoeff, + pd->dequant[0], eob); + break; + default: + assert(0); + } + return; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + switch (tx_size) { + case TX_32X32: + fwd_txfm_32x32_1(src_diff, coeff, diff_stride, tx_type); + vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, + p->quant_fp[0], qcoeff, dqcoeff, + pd->dequant[0], eob); + break; + case TX_16X16: + fwd_txfm_16x16_1(src_diff, coeff, diff_stride, tx_type); + vpx_quantize_dc(coeff, 256, x->skip_block, p->round, + p->quant_fp[0], qcoeff, dqcoeff, + pd->dequant[0], eob); + break; + case TX_8X8: + fwd_txfm_8x8_1(src_diff, coeff, diff_stride, tx_type); + vpx_quantize_dc(coeff, 64, x->skip_block, p->round, + p->quant_fp[0], qcoeff, dqcoeff, + pd->dequant[0], eob); + break; + case TX_4X4: + vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, + x->fwd_txm4x4); + vpx_quantize_dc(coeff, 16, x->skip_block, p->round, + p->quant_fp[0], qcoeff, dqcoeff, + pd->dequant[0], eob); + break; + default: + assert(0); + break; + } +} + void vp10_xform_quant(MACROBLOCK *x, int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; @@ -643,7 +1065,8 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, const struct macroblockd_plane *const pd = &xd->plane[plane]; PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - const scan_order *const scan_order = get_scan(tx_size, tx_type); + const scan_order *const scan_order = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); @@ -659,7 +1082,7 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, switch (tx_size) { case TX_32X32: highbd_fwd_txfm_32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride, - tx_type); + tx_type); vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, @@ -885,7 +1308,7 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { vp10_subtract_plane(x, bsize, 0); vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, - encode_block_pass1, x); + encode_block_pass1, x); } void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { @@ -929,7 +1352,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - const scan_order *const scan_order = get_scan(tx_size, tx_type); + const scan_order *const scan_order = get_scan(tx_size, tx_type, 0); PREDICTION_MODE mode; const int bwl = b_width_log2_lookup[plane_bsize]; const int diff_stride = 4 * (1 << bwl); diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index f5cb0a6a7..d911436f5 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -330,6 +330,7 @@ void vp10_initialize_enc(void) { vp10_rc_init_minq_luts(); vp10_entropy_mv_init(); vp10_temporal_filter_init(); + vp10_encode_token_init(); init_done = 1; } } diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index 1b677763b..998a01393 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -471,6 +471,9 @@ typedef struct VP10_COMP { int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES]; +#if CONFIG_EXT_TX + int ext_tx_costs[EXT_TX_SIZES][EXT_TX_TYPES]; +#endif // CONFIG_EXT_TX int multi_arf_allowed; int multi_arf_enabled; diff --git a/vp10/encoder/pickmode.c b/vp10/encoder/pickmode.c index 3a542b939..0c283d421 100644 --- a/vp10/encoder/pickmode.c +++ b/vp10/encoder/pickmode.c @@ -602,7 +602,8 @@ static void block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist, if (c < max_blocks_wide) { PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - const scan_order *const scan_order = get_scan(tx_size, tx_type); + const scan_order *const scan_order = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c index dad1d2a6e..816b963d3 100644 --- a/vp10/encoder/rd.c +++ b/vp10/encoder/rd.c @@ -83,6 +83,11 @@ static void fill_mode_costs(VP10_COMP *cpi) { for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) vp10_cost_tokens(cpi->switchable_interp_costs[i], fc->switchable_interp_prob[i], vp10_switchable_interp_tree); +#if CONFIG_EXT_TX + for (i = TX_4X4; i <= TX_16X16; ++i) + vp10_cost_tokens(cpi->ext_tx_costs[i], fc->ext_tx_prob[i], + vp10_ext_tx_tree); +#endif // CONFIG_EXT_TX } static void fill_token_costs(vp10_coeff_cost *c, diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index ea4dce5b1..bbc2a294a 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -53,6 +53,10 @@ #define MIN_EARLY_TERM_INDEX 3 #define NEW_MV_DISCOUNT_FACTOR 8 +#if CONFIG_EXT_TX +const double ext_tx_th = 0.98; +#endif + typedef struct { PREDICTION_MODE mode; MV_REFERENCE_FRAME ref_frame[2]; @@ -571,10 +575,10 @@ static void txfm_rd_in_plane(MACROBLOCK *x, vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size); - args.so = get_scan(tx_size, tx_type); + args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); vp10_foreach_transformed_block_in_plane(xd, bsize, plane, - block_rd_txfm, &args); + block_rd_txfm, &args); if (args.exit_early) { *rate = INT_MAX; *distortion = INT64_MAX; @@ -600,10 +604,26 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; mbmi->tx_size = MIN(max_tx_size, largest_tx_size); +#if CONFIG_EXT_TX + if (mbmi->ext_txfrm >= GET_EXT_TX_TYPES(mbmi->tx_size)) { + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skip = 0; + return; + } +#endif // CONFIG_EXT_TX txfm_rd_in_plane(x, rate, distortion, skip, sse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); + +#if CONFIG_EXT_TX + if (is_inter_block(mbmi) && bs >= BLOCK_8X8 && + mbmi->tx_size <= TX_16X16 && + !xd->lossless && *rate != INT_MAX) + *rate += cpi->ext_tx_costs[mbmi->tx_size][mbmi->ext_txfrm]; +#endif // CONFIG_EXT_TX } static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, @@ -653,9 +673,23 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, else r_tx_size += vp10_cost_one(tx_probs[m]); } +#if CONFIG_EXT_TX + if (mbmi->ext_txfrm >= GET_EXT_TX_TYPES(n)) { + r[n][0] = r[n][1] = INT_MAX; + d[n] = INT64_MAX; + } else { +#endif // CONFIG_EXT_TX txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0, bs, n, cpi->sf.use_fast_coef_costing); + +#if CONFIG_EXT_TX + } + if (is_inter_block(mbmi) && bs >= BLOCK_8X8 && + !xd->lossless && r[n][0] != INT_MAX) + r[n][0] += cpi->ext_tx_costs[n][mbmi->ext_txfrm]; +#endif // CONFIG_EXT_TX + r[n][1] = r[n][0]; if (r[n][0] < INT_MAX) { r[n][1] += r_tx_size; @@ -665,6 +699,7 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, } else if (s[n]) { if (is_inter_block(mbmi)) { rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); + r[n][1] -= r_tx_size; } else { rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]); @@ -686,6 +721,7 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, best_rd = rd[n][1]; } } + mbmi->tx_size = best_tx; *distortion = d[mbmi->tx_size]; @@ -805,7 +841,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, dst, dst_stride, xd->bd); if (xd->lossless) { TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); - const scan_order *so = get_scan(TX_4X4, tx_type); + const scan_order *so = get_scan(TX_4X4, tx_type, 0); vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, vp10_highbd_fwht4x4); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); @@ -821,7 +857,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, } else { int64_t unused; TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); - const scan_order *so = get_scan(TX_4X4, tx_type); + const scan_order *so = get_scan(TX_4X4, tx_type, 0); vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, vpx_highbd_fdct4x4); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); @@ -910,7 +946,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, if (xd->lossless) { TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); - const scan_order *so = get_scan(TX_4X4, tx_type); + const scan_order *so = get_scan(TX_4X4, tx_type, 0); vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, vp10_fwht4x4); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, @@ -924,7 +960,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, } else { int64_t unused; TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); - const scan_order *so = get_scan(TX_4X4, tx_type); + const scan_order *so = get_scan(TX_4X4, tx_type, 0); vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, vpx_fdct4x4); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, @@ -1317,7 +1353,7 @@ static int64_t encode_inter_mb_segment(VP10_COMP *cpi, int64_t thisdistortion = 0, thissse = 0; int thisrate = 0, ref; TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, TX_4X4); - const scan_order *so = get_scan(TX_4X4, tx_type); + const scan_order *so = get_scan(TX_4X4, tx_type, 1); const int is_compound = has_second_ref(&mi->mbmi); const InterpKernel *kernel = vp10_filter_kernels[mi->mbmi.interp_filter]; @@ -2691,6 +2727,37 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, // Y cost and distortion vp10_subtract_plane(x, bsize, 0); +#if CONFIG_EXT_TX + if (xd->lossless) { + mbmi->ext_txfrm = NORM; + } else { + int64_t rdcost_tx; + int rate_y_tx; + int64_t distortion_y_tx; + int dummy; + int64_t best_rdcost_tx = INT64_MAX; + int best_ext_tx = NORM; + + for (i = NORM; i < EXT_TX_TYPES; i++) { + mbmi->ext_txfrm = i; + super_block_yrd(cpi, x, &rate_y_tx, &distortion_y_tx, &dummy, psse, + bsize, INT64_MAX); + assert(rate_y_tx != INT_MAX); + assert(rate_y_tx >= 0); + rdcost_tx = RDCOST(x->rdmult, x->rddiv, rate_y_tx, distortion_y_tx); + rdcost_tx = MIN(rdcost_tx, RDCOST(x->rdmult, x->rddiv, 0, *psse)); + assert(rdcost_tx >= 0); + if (rdcost_tx < best_rdcost_tx * ext_tx_th) { + best_ext_tx = i; + best_rdcost_tx = rdcost_tx; + } + } + if (mbmi->tx_size > TX_16X16) + assert(best_ext_tx == NORM); + mbmi->ext_txfrm = best_ext_tx; + } +#endif // CONFIG_EXT_TX + super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize, ref_best_rd); @@ -3288,6 +3355,9 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; struct macroblockd_plane *const pd = &xd->plane[1]; +#if CONFIG_EXT_TX + mbmi->ext_txfrm = NORM; +#endif // CONFIG_EXT_TX memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize, best_rd); @@ -3312,6 +3382,9 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; } else { +#if CONFIG_EXT_TX + mbmi->ext_txfrm = NORM; +#endif this_rd = handle_inter_mode(cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv, @@ -3744,6 +3817,9 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi, int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; int internal_active_edge = vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi); +#if CONFIG_EXT_TX + mbmi->ext_txfrm = NORM; +#endif x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; memset(x->zcoeff_blk[TX_4X4], 0, 4); @@ -3903,6 +3979,9 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi, if (ref_frame == INTRA_FRAME) { int rate; +#if CONFIG_EXT_TX + mbmi->ext_txfrm = NORM; +#endif if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y, best_rd) >= best_rd) continue; diff --git a/vp10/encoder/subexp.c b/vp10/encoder/subexp.c index fd0b09b69..361d73868 100644 --- a/vp10/encoder/subexp.c +++ b/vp10/encoder/subexp.c @@ -211,3 +211,12 @@ void vp10_cond_prob_diff_update(vpx_writer *w, vpx_prob *oldp, vpx_write(w, 0, upd); } } + +int vp10_cond_prob_diff_update_savings(vpx_prob *oldp, + const unsigned int ct[2]) { + const vpx_prob upd = DIFF_UPDATE_PROB; + vpx_prob newp = get_binary_prob(ct[0], ct[1]); + const int savings = vp10_prob_diff_update_savings_search(ct, *oldp, &newp, + upd); + return savings; +} diff --git a/vp10/encoder/subexp.h b/vp10/encoder/subexp.h index 04b96c0bd..bad23d654 100644 --- a/vp10/encoder/subexp.h +++ b/vp10/encoder/subexp.h @@ -36,6 +36,8 @@ int vp10_prob_diff_update_savings_search_model(const unsigned int *ct, vpx_prob *bestp, vpx_prob upd, int stepsize); +int vp10_cond_prob_diff_update_savings(vpx_prob *oldp, + const unsigned int ct[2]); #ifdef __cplusplus } // extern "C" diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c index b2ee95e7f..b8d0281b3 100644 --- a/vp10/encoder/tokenize.c +++ b/vp10/encoder/tokenize.c @@ -508,7 +508,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, const int segment_id = mbmi->segment_id; const int16_t *scan, *nb; const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size); - const scan_order *const so = get_scan(tx_size, tx_type); + const scan_order *const so = get_scan(tx_size, tx_type, is_inter_block(mbmi)); const int ref = is_inter_block(mbmi); unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] = td->rd_counts.coef_counts[tx_size][type][ref];