For higher level fwd and inv transform functions.
Change-Id: I91518250a0be7d94aada7519f6c9e7ed024574fb
add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht16x16_256_add sse2 avx2/;
+
+ add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/av1_iht32x32_1024_add/;
}
} else {
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht16x16_256_add/;
+
+ add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/av1_iht32x32_1024_add/;
+
} else {
add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht4x4_16_add sse2 neon dspr2/;
add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht16x16_256_add sse2 avx2 dspr2/;
+ add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/av1_iht32x32_1024_add/;
+
if (aom_config("CONFIG_EXT_TX") ne "yes") {
specialize qw/av1_iht4x4_16_add msa/;
specialize qw/av1_iht8x8_64_add msa/;
}
}
}
+add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+specialize qw/av1_iht32x32_1024_add/;
+
+if (aom_config("CONFIG_TX64X64") eq "yes") {
+ add_proto qw/void av1_iht64x64_4096_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/av1_iht64x64_4096_add/;
+}
if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
add_proto qw/void quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
#if CONFIG_CB4X4
TX_2X2, // 2x2 transform
#endif
- TX_4X4, // 4x4 transform
- TX_8X8, // 8x8 transform
- TX_16X16, // 16x16 transform
- TX_32X32, // 32x32 transform
- TX_4X8, // 4x8 transform
- TX_8X4, // 8x4 transform
- TX_8X16, // 8x16 transform
- TX_16X8, // 16x8 transform
- TX_16X32, // 16x32 transform
- TX_32X16, // 32x16 transform
- TX_SIZES_ALL, // Includes rectangular transforms
- TX_SIZES = TX_32X32 + 1, // Does NOT include rectangular transforms
- TX_INVALID = 255 // Invalid transform size
+ TX_4X4, // 4x4 transform
+ TX_8X8, // 8x8 transform
+ TX_16X16, // 16x16 transform
+ TX_32X32, // 32x32 transform
+#if CONFIG_TX64X64
+ TX_64X64, // 64x64 transform
+#endif // CONFIG_TX64X64
+ TX_4X8, // 4x8 transform
+ TX_8X4, // 8x4 transform
+ TX_8X16, // 8x16 transform
+ TX_16X8, // 16x8 transform
+ TX_16X32, // 16x32 transform
+ TX_32X16, // 32x16 transform
+#if 0 // CONFIG_TX64X64
+ // TODO(debargha): To be enabled later
+ TX_32X64, // 32x64 transform
+ TX_64X32, // 64x32 transform
+#endif // CONFIG_TX64X64
+ TX_SIZES_ALL, // Includes rectangular transforms
+ TX_SIZES = TX_4X8, // Does NOT include rectangular transforms
+ TX_INVALID = 255 // Invalid transform size
} TX_SIZE;
#define MAX_TX_DEPTH (TX_32X32 - TX_4X4)
-#define MAX_TX_SIZE_LOG2 5
+#define MAX_TX_SIZE_LOG2 (5 + CONFIG_TX64X64)
#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
#define MIN_TX_SIZE_LOG2 2
#define MIN_TX_SIZE (1 << MIN_TX_SIZE_LOG2)
int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
const TX_SIZE tx_size) {
(void)tx_type;
-#if CONFIG_AOM_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- return txsize_sqr_up_map[tx_size] == TX_32X32;
- }
-#else
(void)xd;
-#endif
- return txsize_sqr_up_map[tx_size] == TX_32X32;
+ if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
+#if CONFIG_TX64X64
+ else if (txsize_sqr_up_map[tx_size] == TX_64X64)
+ return 2;
+#endif // CONFIG_TX64X64
+ else
+ return 0;
}
// NOTE: The implementation of all inverses need to be aware of the fact
int i;
for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
}
+
+#if CONFIG_TX64X64
+static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 64; ++i)
+ output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
+}
+#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
// For use in lieu of ADST
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
-static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 64; ++i)
- output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
-}
-
// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
int i;
output[i] =
HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 4 * Sqrt2), bd);
}
+#endif // CONFIG_TX64X64
+#endif // CONFIG_EXT_TX
+#if CONFIG_TX64X64
// For use in lieu of ADST
static void highbd_ihalfright64_c(const tran_low_t *input, tran_low_t *output,
int bd) {
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
#endif // CONFIG_AOM_HIGHBITDEPTH
// Inverse identity transform and add.
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int bs, int tx_type) {
int r, c;
- const int shift = bs < 32 ? 3 : 2;
+ const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
if (tx_type == IDTX) {
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c)
}
}
}
+#endif // CONFIG_EXT_TX
#if CONFIG_TX64X64
void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ ihalfright64_c, idct64_row_c }, // ADST_DCT
{ idct64_col_c, ihalfright64_c }, // DCT_ADST
{ ihalfright64_c, ihalfright64_c }, // ADST_ADST
+#if CONFIG_EXT_TX
{ ihalfright64_c, idct64_row_c }, // FLIPADST_DCT
{ idct64_col_c, ihalfright64_c }, // DCT_FLIPADST
{ ihalfright64_c, ihalfright64_c }, // FLIPADST_FLIPADST
{ iidtx64_c, ihalfright64_c }, // H_ADST
{ ihalfright64_c, iidtx64_c }, // V_FLIPADST
{ iidtx64_c, ihalfright64_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
};
int i, j;
IHT_64[tx_type].cols(out[i], out[i]);
}
+#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
+#endif // CONFIG_EXT_TX
// Sum with the destination
for (i = 0; i < 64; ++i) {
}
}
#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
// idct
void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
aom_idct32x32_1024_add(input, dest, stride);
}
+#if CONFIG_TX64X64
+void av1_idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob) {
+ (void)eob;
+ av1_iht64x64_4096_add(input, dest, stride, DCT_DCT);
+}
+#endif // CONFIG_TX64X64
+
void av1_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type, int lossless) {
if (lossless) {
}
}
+#if CONFIG_TX64X64
+void av1_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type) {
+ switch (tx_type) {
+ case DCT_DCT: av1_idct64x64_add(input, dest, stride, eob); break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ av1_iht64x64_4096_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX: inv_idtx_add_c(input, dest, stride, 64, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_TX64X64
+
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
}
}
}
+#endif // CONFIG_EXT_TX
#if CONFIG_TX64X64
void av1_highbd_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest8,
{ highbd_ihalfright64_c, highbd_idct64_row_c }, // ADST_DCT
{ highbd_idct64_col_c, highbd_ihalfright64_c }, // DCT_ADST
{ highbd_ihalfright64_c, highbd_ihalfright64_c }, // ADST_ADST
+#if CONFIG_EXT_TX
{ highbd_ihalfright64_c, highbd_idct64_row_c }, // FLIPADST_DCT
{ highbd_idct64_col_c, highbd_ihalfright64_c }, // DCT_FLIPADST
{ highbd_ihalfright64_c, highbd_ihalfright64_c }, // FLIPADST_FLIPADST
{ highbd_iidtx64_c, highbd_ihalfright64_c }, // H_ADST
{ highbd_ihalfright64_c, highbd_iidtx64_c }, // V_FLIPADST
{ highbd_iidtx64_c, highbd_ihalfright64_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
};
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
HIGH_IHT_64[tx_type].cols(out[i], out[i], bd);
}
+#if CONFIG_EXT_TX
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
+#endif // CONFIG_EXT_TX
// Sum with the destination
for (i = 0; i < 64; ++i) {
}
}
#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
// idct
void av1_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
default: assert(0); break;
}
}
+
+#if CONFIG_TX64X64
+void av1_highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void)eob;
+ switch (tx_type) {
+ case DCT_DCT:
+ av1_inv_txfm2d_add_64x64(input, CONVERT_TO_SHORTPTR(dest), stride,
+ DCT_DCT, bd);
+ break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ av1_highbd_iht64x64_4096_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 64, tx_type, bd);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_TX64X64
#endif // CONFIG_AOM_HIGHBITDEPTH
void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
const int lossless = inv_txfm_param->lossless;
switch (tx_size) {
+#if CONFIG_TX64X64
+ case TX_64X64:
+ av1_inv_txfm_add_64x64(input, dest, stride, eob, tx_type);
+ break;
+#endif // CONFIG_TX64X64
case TX_32X32:
av1_inv_txfm_add_32x32(input, dest, stride, eob, tx_type);
break;
const int lossless = inv_txfm_param->lossless;
switch (tx_size) {
+#if CONFIG_TX64X64
+ case TX_64X64:
+ av1_highbd_inv_txfm_add_64x64(input, dest, stride, eob, bd, tx_type);
+ break;
+#endif // CONFIG_TX64X64
case TX_32X32:
av1_highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
break;
av1_fht32x32(src, dst, src_stride, DCT_DCT);
}
+#if CONFIG_TX64X64
+static INLINE void fdct64x64(const int16_t *src, tran_low_t *dst,
+ int src_stride) {
+ av1_fht64x64(src, dst, src_stride, DCT_DCT);
+}
+
+static INLINE void fdct64x64_1(const int16_t *src, tran_low_t *dst,
+ int src_stride) {
+ int i, j;
+ int32_t sum = 0;
+ memset(dst, 0, sizeof(*dst) * 4096);
+ for (i = 0; i < 64; ++i)
+ for (j = 0; j < 64; ++j) sum += src[i * src_stride + j];
+ // Note: this scaling makes the transform 2 times unitary
+ dst[0] = ROUND_POWER_OF_TWO_SIGNED(sum, 5);
+}
+#endif // CONFIG_TX64X64
+
static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless) {
if (lossless) {
}
}
+#if CONFIG_TX64X64
+static void fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ switch (tx_type) {
+ case DCT_DCT:
+ if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
+ fdct64x64(src_diff, coeff, diff_stride);
+ else // FWD_TXFM_OPT_DC
+ fdct64x64_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ av1_fht64x64(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST: av1_fht32x32(src_diff, coeff, diff_stride, tx_type); break;
+ case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_TX64X64
+
#if CONFIG_AOM_HIGHBITDEPTH
static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless,
default: assert(0); break;
}
}
+
+#if CONFIG_TX64X64
+static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void)fwd_txfm_opt;
+ (void)bd;
+ switch (tx_type) {
+ case DCT_DCT:
+ av1_highbd_fht64x64_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ av1_highbd_fht64x64_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_TX64X64
#endif // CONFIG_AOM_HIGHBITDEPTH
void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
const int rd_transform = fwd_txfm_param->rd_transform;
const int lossless = fwd_txfm_param->lossless;
switch (tx_size) {
+#if CONFIG_TX64X64
+ case TX_64X64:
+ fwd_txfm_64x64(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+#endif // CONFIG_TX64X64
case TX_32X32:
fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type,
fwd_txfm_opt);
const int lossless = fwd_txfm_param->lossless;
const int bd = fwd_txfm_param->bd;
switch (tx_size) {
+#if CONFIG_TX64X64
+ case TX_64X64:
+ highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
+ bd);
+ break;
+#endif // CONFIG_TX64X64
case TX_32X32:
highbd_fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type,
fwd_txfm_opt, bd);