#define USE_MSKTX_FOR_32X32 1
static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = {
- 1, 17, 10, 2
+ 1, 19, 12, 2
};
static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = {
1, 17, 10
// Transform types used in each inter set
static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, },
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, },
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1},
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1},
};
static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs,
[TREE_SIZE(TX_TYPES)] = {
{ // ToDo(yaowu): remove used entry 0.
-IDTX, 2,
- -DCT_DCT, 4,
- -DST_DST, 6,
- 8, 18,
- 10, 12,
- -DST_DCT, -DCT_DST,
+ -V_DCT, 4,
+ -H_DCT, 6,
+ -DCT_DCT, 8,
+ -DST_DST, 10,
+ 12, 22,
14, 16,
+ -DST_DCT, -DCT_DST,
+ 18, 20,
-ADST_DCT, -DCT_ADST,
-FLIPADST_DCT, -DCT_FLIPADST,
- 20, 26,
- 22, 24,
+ 24, 30,
+ 26, 28,
-DST_ADST, -ADST_DST,
-DST_FLIPADST, -FLIPADST_DST,
- 28, 30,
+ 32, 34,
-ADST_ADST, -FLIPADST_FLIPADST,
-ADST_FLIPADST, -FLIPADST_ADST,
}, {
-IDTX, 2,
- -DCT_DCT, 4,
- -DST_DST, 6,
- 8, 18,
- 10, 12,
- -DST_DCT, -DCT_DST,
+ -V_DCT, 4,
+ -H_DCT, 6,
+ -DCT_DCT, 8,
+ -DST_DST, 10,
+ 12, 22,
14, 16,
+ -DST_DCT, -DCT_DST,
+ 18, 20,
-ADST_DCT, -DCT_ADST,
-FLIPADST_DCT, -DCT_FLIPADST,
- 20, 26,
- 22, 24,
+ 24, 30,
+ 26, 28,
-DST_ADST, -ADST_DST,
-DST_FLIPADST, -FLIPADST_DST,
- 28, 30,
+ 32, 34,
-ADST_ADST, -FLIPADST_FLIPADST,
-ADST_FLIPADST, -FLIPADST_ADST,
}, {
-IDTX, 2,
- -DCT_DCT, 4,
- 6, 12,
- 8, 10,
+ -V_DCT, 4,
+ -H_DCT, 6,
+ -DCT_DCT, 8,
+ 10, 16,
+ 12, 14,
-ADST_DCT, -DCT_ADST,
-FLIPADST_DCT, -DCT_FLIPADST,
- 14, 16,
+ 18, 20,
-ADST_ADST, -FLIPADST_FLIPADST,
-ADST_FLIPADST, -FLIPADST_ADST
}, {
static const vpx_prob
default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = {
{ // ToDo(yaowu): remove unused entry 0.
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
#if EXT_TX_SIZES == 4
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
#endif
}, {
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
- { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
#if EXT_TX_SIZES == 4
- { 12, 160, 16, 144, 160, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128 },
+ { 12, 15, 15, 160, 16, 144, 160, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128 },
#endif
}, {
- { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
#if EXT_TX_SIZES == 4
- { 12, 160, 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 15, 15, 160, 128, 128, 128, 128, 128, 128, 128 },
#endif
}, {
{ 12, },
FLIPADST_DST = 14,
DST_DST = 15,
IDTX = 16,
+ V_DCT = 17,
+ H_DCT = 18,
#endif // CONFIG_EXT_TX
TX_TYPES,
} TX_TYPE;
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_EXT_TX
-// Inverse identiy transform and add.
+// Inverse identity transform and add.
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- int bs) {
+ int bs, int tx_type) {
int r, c;
const int shift = bs < 32 ? 3 : 2;
+
+ tran_low_t temp_in[32], temp_out[32];
+ transform_2d ht = {idct4_c, idct4_c};
+ int out_scale = 1;
+ int coeff_stride = 0;
+
+ switch (bs) {
+ case 4:
+ ht.cols = idct4_c;
+ ht.rows = idct4_c;
+ out_scale = cospi_16_64 >> 3;
+ coeff_stride = 4;
+ break;
+ case 8:
+ ht.cols = idct8_c;
+ ht.rows = idct8_c;
+ out_scale = (1 << (DCT_CONST_BITS - 4));
+ coeff_stride = 8;
+ break;
+ case 16:
+ ht.cols = idct16_c;
+ ht.rows = idct16_c;
+ out_scale = cospi_16_64 >> 4;
+ coeff_stride = 16;
+ break;
+ case 32:
+ ht.cols = idct32_c;
+ ht.rows = idct32_c;
+ out_scale = (1 << (DCT_CONST_BITS - 4));
+ coeff_stride = 32;
+ break;
+ default:
+ assert(0);
+ }
+
+ // Columns
+ if (tx_type == V_DCT) {
+ for (c = 0; c < bs; ++c) {
+ for (r = 0; r < bs; ++r)
+ temp_in[r] = input[r * coeff_stride + c];
+ ht.cols(temp_in, temp_out);
+
+ for (r = 0; r < bs; ++r) {
+ tran_high_t temp = (tran_high_t)temp_out[r] * out_scale;
+ temp >>= DCT_CONST_BITS;
+ dest[r * stride + c] = clip_pixel_add(dest[r * stride + c],
+ (tran_low_t)temp);
+ }
+ }
+ return;
+ }
+
+ if (tx_type == H_DCT) {
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c)
+ temp_in[c] = input[r * coeff_stride + c];
+ ht.rows(temp_in, temp_out);
+
+ for (c = 0; c < bs; ++c) {
+ tran_high_t temp = (tran_high_t)temp_out[c] * out_scale;
+ temp >>= DCT_CONST_BITS;
+ dest[r * stride + c] = clip_pixel_add(dest[r * stride + c],
+ (tran_low_t)temp);
+ }
+ }
+ return;
+ }
+
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c)
dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case V_DCT:
+ case H_DCT:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
// Use C version since DST only exists in C code
vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- inv_idtx_add_c(input, dest, stride, 4);
+ inv_idtx_add_c(input, dest, stride, 4, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
// Use C version since DST only exists in C code
vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- inv_idtx_add_c(input, dest, stride, 8);
+ inv_idtx_add_c(input, dest, stride, 8, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
// Use C version since DST only exists in C code
vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- inv_idtx_add_c(input, dest, stride, 16);
+ inv_idtx_add_c(input, dest, stride, 16, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
case DST_FLIPADST:
vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- inv_idtx_add_c(input, dest, stride, 32);
+ inv_idtx_add_c(input, dest, stride, 32, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
+ {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
}, { // TX_8X8
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
+ {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
}, { // TX_16X16
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
}, { // TX_32X32
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
qtr_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
+ {h2_scan_32x32, vp10_h2_iscan_32x32,
+ h2_scan_32x32_neighbors},
+ {v2_scan_32x32, vp10_v2_iscan_32x32,
+ v2_scan_32x32_neighbors},
}
};
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
+ {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
}, { // TX_8X8
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
+ {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
}, { // TX_16X16
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
}, { // TX_32X32
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
qtr_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
+ {h2_scan_32x32, vp10_h2_iscan_32x32,
+ h2_scan_32x32_neighbors},
+ {v2_scan_32x32, vp10_v2_iscan_32x32,
+ v2_scan_32x32_neighbors},
}
};
specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
}
+add_proto qw/void vp10_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type";
+ specialize qw/vp10_fwd_idtx/;
+
# Inverse transform
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Note as optimized versions of these functions are added we need to add a check to ensure
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case H_DCT:
+ case V_DCT:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
}
}
+// Forward identity transform.
+void vp10_fwd_idtx_c(const int16_t *src_diff,
+ tran_low_t *coeff, int stride,
+ int bs, int tx_type) {
+ int r, c;
+ const int shift = bs < 32 ? 3 : 2;
+
+ const int16_t *input = src_diff;
+ tran_low_t *output = coeff;
+
+ int i, j;
+ tran_low_t temp_in[32], temp_out[32];
+ transform_2d ht = {fdct4, fdct4};
+ int in_scale = 1;
+ int out_scale = 1;
+ int coeff_stride = 0;
+
+ switch (bs) {
+ case 4:
+ ht.cols = fdct4;
+ ht.rows = fdct4;
+ in_scale = 16;
+ out_scale = cospi_16_64 >> 1;
+ coeff_stride = 4;
+ break;
+ case 8:
+ ht.cols = fdct8;
+ ht.rows = fdct8;
+ in_scale = 4;
+ out_scale = (1 << DCT_CONST_BITS);
+ coeff_stride = 8;
+ break;
+ case 16:
+ ht.cols = fdct16;
+ ht.rows = fdct16;
+ in_scale = 4;
+ out_scale = cospi_16_64;
+ coeff_stride = 16;
+ break;
+ case 32:
+ ht.cols = fdct32;
+ ht.rows = fdct32;
+ in_scale = 4;
+ out_scale = (1 << (DCT_CONST_BITS - 2));
+ coeff_stride = 32;
+ break;
+ default:
+ assert(0);
+ }
+
+ // Columns
+ if (tx_type == V_DCT) {
+ for (i = 0; i < bs; ++i) {
+ for (j = 0; j < bs; ++j)
+ temp_in[j] = input[j * stride + i] * in_scale;
+ ht.cols(temp_in, temp_out);
+
+ for (j = 0; j < bs; ++j) {
+ tran_high_t temp = (tran_high_t)temp_out[j] * out_scale;
+ temp >>= DCT_CONST_BITS;
+ output[j * coeff_stride + i] = (tran_low_t)temp;
+ }
+ }
+ return;
+ }
+
+ // Rows
+ if (tx_type == H_DCT) {
+ for (j = 0; j < bs; ++j) {
+ for (i = 0; i < bs; ++i)
+ temp_in[i] = input[j * stride + i] * in_scale;
+ ht.rows(temp_in, temp_out);
+
+ for (i = 0; i < bs; ++i) {
+ tran_high_t temp = (tran_high_t)temp_out[i] * out_scale;
+ temp >>= DCT_CONST_BITS;
+ output[j * coeff_stride + i] = (tran_low_t)temp;
+ }
+ }
+ return;
+ }
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
+ src_diff += stride;
+ coeff += bs;
+ }
+}
+
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_fht32x32_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-#if CONFIG_EXT_TX
-// Forward identity transform.
-static void fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
- int bs) {
- int r, c;
- const int shift = bs < 32 ? 3 : 2;
-
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
- src_diff += stride;
- coeff += bs;
- }
-}
-#endif // CONFIG_EXT_TX
-
void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless) {
if (lossless) {
case FLIPADST_DST:
vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 4);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
case FLIPADST_DST:
vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 8);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
// Use C version since DST exists only in C
vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 16);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
case FLIPADST_DST:
vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
break;
+ case H_DCT:
+ case V_DCT:
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 32);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 4);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 8);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 16);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
#endif // CONFIG_EXT_TX
default:
vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
- fwd_idtx_c(src_diff, coeff, diff_stride, 32);
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;
#endif // CONFIG_EXT_TX
default: