From 3a8c43a479bf0ab77c30d9afeebd383255d12efe Mon Sep 17 00:00:00 2001 From: Peter de Rivaz Date: Fri, 24 Oct 2014 08:37:39 +0100 Subject: [PATCH] Refactored idct routines and headers This change is made in preparation for a subsequent patch which adds acceleration for the highbitdepth transform functions. The highbitdepth transform functions attempt to use 16/32bit sse instructions where possible, but fallback to using the C implementations if potential overflow is detected. For this reason the dct routines are made global so they can be called from the acceleration functions in the subsequent patch. Change-Id: Ia921f191bf6936ccba4f13e8461624b120c1f665 (cherry picked from commit 454342d4e77dbb67f4a3c10f97a57a6fcb46d9a0) --- vp9/common/vp9_idct.c | 78 +++++++++++++------------------------------ vp9/common/vp9_idct.h | 30 +++++++++++++++++ vp9/encoder/vp9_dct.c | 48 ++++++++------------------ vp9/encoder/vp9_dct.h | 61 +++++++++++++++++++++++++++++++++ vp9/vp9cx.mk | 1 + 5 files changed, 130 insertions(+), 88 deletions(-) create mode 100644 vp9/encoder/vp9_dct.h diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index b2c78c790..1a3fefc5f 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -11,39 +11,9 @@ #include #include "./vp9_rtcd.h" -#include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_idct.h" - -#if CONFIG_EMULATE_HARDWARE -// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a -// non-normative method to handle overflows. A stream that causes -// overflows in the inverse transform is considered invalid in VP9, -// and a hardware implementer is free to choose any reasonable -// method to handle overflows. However to aid in hardware -// verification they can use a specific implementation of the -// WRAPLOW() macro below that is identical to their intended -// hardware implementation (and also use configure options to trigger -// the C-implementation of the transform). -// -// The particular WRAPLOW implementation below performs strict -// overflow wrapping to match common hardware implementations. -// bd of 8 uses trans_low with 16bits, need to remove 16bits -// bd of 10 uses trans_low with 18bits, need to remove 14bits -// bd of 12 uses trans_low with 20bits, need to remove 12bits -// bd of x uses trans_low with 8+x bits, need to remove 24-x bits -#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd)) -#else -#define WRAPLOW(x, bd) ((int32_t)(x)) -#endif // CONFIG_EMULATE_HARDWARE - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, - int bd) { - trans = WRAPLOW(trans, bd); - return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd); -} -#endif // CONFIG_VP9_HIGHBITDEPTH +#include "vp9/common/vp9_systemdependent.h" static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) { trans = WRAPLOW(trans, 8); @@ -1540,7 +1510,7 @@ void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, } } -static void highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) { +void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step[4]; tran_high_t temp1, temp2; (void) bd; @@ -1571,7 +1541,7 @@ void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, // Rows for (i = 0; i < 4; ++i) { - highbd_idct4(input, outptr, bd); + vp9_highbd_idct4(input, outptr, bd); input += 4; outptr += 4; } @@ -1580,7 +1550,7 @@ void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; - highbd_idct4(temp_in, temp_out, bd); + vp9_highbd_idct4(temp_in, temp_out, bd); for (j = 0; j < 4; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); @@ -1607,7 +1577,7 @@ void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, } } -static void highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) { +void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step1[8], step2[8]; tran_high_t temp1, temp2; // stage 1 @@ -1625,7 +1595,7 @@ static void highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) { step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); // stage 2 & stage 3 - even half - highbd_idct4(step1, step1, bd); + vp9_highbd_idct4(step1, step1, bd); // stage 2 - odd half step2[4] = WRAPLOW(step1[4] + step1[5], bd); @@ -1662,7 +1632,7 @@ void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, // First transform rows. for (i = 0; i < 8; ++i) { - highbd_idct8(input, outptr, bd); + vp9_highbd_idct8(input, outptr, bd); input += 8; outptr += 8; } @@ -1671,7 +1641,7 @@ void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; - highbd_idct8(temp_in, temp_out, bd); + vp9_highbd_idct8(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); @@ -1735,9 +1705,9 @@ static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { const highbd_transform_2d IHT_4[] = { - { highbd_idct4, highbd_idct4 }, // DCT_DCT = 0 - { highbd_iadst4, highbd_idct4 }, // ADST_DCT = 1 - { highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 + { vp9_highbd_idct4, vp9_highbd_idct4 }, // DCT_DCT = 0 + { highbd_iadst4, vp9_highbd_idct4 }, // ADST_DCT = 1 + { vp9_highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 }; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); @@ -1844,9 +1814,9 @@ static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) { } static const highbd_transform_2d HIGH_IHT_8[] = { - { highbd_idct8, highbd_idct8 }, // DCT_DCT = 0 - { highbd_iadst8, highbd_idct8 }, // ADST_DCT = 1 - { highbd_idct8, highbd_iadst8 }, // DCT_ADST = 2 + { vp9_highbd_idct8, vp9_highbd_idct8 }, // DCT_DCT = 0 + { highbd_iadst8, vp9_highbd_idct8 }, // ADST_DCT = 1 + { vp9_highbd_idct8, highbd_iadst8 }, // DCT_ADST = 2 { highbd_iadst8, highbd_iadst8 } // ADST_ADST = 3 }; @@ -1889,7 +1859,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, // First transform rows. // Only first 4 row has non-zero coefs. for (i = 0; i < 4; ++i) { - highbd_idct8(input, outptr, bd); + vp9_highbd_idct8(input, outptr, bd); input += 8; outptr += 8; } @@ -1897,7 +1867,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; - highbd_idct8(temp_in, temp_out, bd); + vp9_highbd_idct8(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); @@ -1905,7 +1875,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, } } -static void highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd) { +void vp9_highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step1[16], step2[16]; tran_high_t temp1, temp2; (void) bd; @@ -2081,7 +2051,7 @@ void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, // First transform rows. for (i = 0; i < 16; ++i) { - highbd_idct16(input, outptr, bd); + vp9_highbd_idct16(input, outptr, bd); input += 16; outptr += 16; } @@ -2090,7 +2060,7 @@ void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; - highbd_idct16(temp_in, temp_out, bd); + vp9_highbd_idct16(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); @@ -2270,9 +2240,9 @@ static void highbd_iadst16(const tran_low_t *input, tran_low_t *output, } static const highbd_transform_2d HIGH_IHT_16[] = { - { highbd_idct16, highbd_idct16 }, // DCT_DCT = 0 - { highbd_iadst16, highbd_idct16 }, // ADST_DCT = 1 - { highbd_idct16, highbd_iadst16 }, // DCT_ADST = 2 + { vp9_highbd_idct16, vp9_highbd_idct16 }, // DCT_DCT = 0 + { highbd_iadst16, vp9_highbd_idct16 }, // ADST_DCT = 1 + { vp9_highbd_idct16, highbd_iadst16 }, // DCT_ADST = 2 { highbd_iadst16, highbd_iadst16 } // ADST_ADST = 3 }; @@ -2315,7 +2285,7 @@ void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, // First transform rows. Since all non-zero dct coefficients are in // upper-left 4x4 area, we only need to calculate first 4 rows here. for (i = 0; i < 4; ++i) { - highbd_idct16(input, outptr, bd); + vp9_highbd_idct16(input, outptr, bd); input += 16; outptr += 16; } @@ -2324,7 +2294,7 @@ void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j*16 + i]; - highbd_idct16(temp_in, temp_out, bd); + vp9_highbd_idct16(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index fcabaa34f..1d8836cf3 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -118,6 +118,28 @@ typedef struct { } highbd_transform_2d; #endif // CONFIG_VP9_HIGHBITDEPTH +#if CONFIG_EMULATE_HARDWARE +// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a +// non-normative method to handle overflows. A stream that causes +// overflows in the inverse transform is considered invalid in VP9, +// and a hardware implementer is free to choose any reasonable +// method to handle overflows. However to aid in hardware +// verification they can use a specific implementation of the +// WRAPLOW() macro below that is identical to their intended +// hardware implementation (and also use configure options to trigger +// the C-implementation of the transform). +// +// The particular WRAPLOW implementation below performs strict +// overflow wrapping to match common hardware implementations. +// bd of 8 uses trans_low with 16bits, need to remove 16bits +// bd of 10 uses trans_low with 18bits, need to remove 14bits +// bd of 12 uses trans_low with 20bits, need to remove 12bits +// bd of x uses trans_low with 8+x bits, need to remove 24-x bits +#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd)) +#else +#define WRAPLOW(x, bd) (x) +#endif // CONFIG_EMULATE_HARDWARE + void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, @@ -137,6 +159,9 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, int stride, int eob); #if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd); +void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd); +void vp9_highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd); void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd); void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, @@ -153,6 +178,11 @@ void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd); void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd); +static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, + int bd) { + trans = WRAPLOW(trans, bd); + return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd); +} #endif // CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index e61c09b6b..020a95196 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -17,6 +17,7 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_systemdependent.h" +#include "vp9/encoder/vp9_dct.h" static INLINE tran_high_t fdct_round_shift(tran_high_t input) { tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); @@ -26,7 +27,7 @@ static INLINE tran_high_t fdct_round_shift(tran_high_t input) { return rv; } -static void fdct4(const tran_low_t *input, tran_low_t *output) { +void vp9_fdct4(const tran_low_t *input, tran_low_t *output) { tran_high_t step[4]; tran_high_t temp1, temp2; @@ -123,7 +124,7 @@ void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) { } } -static void fadst4(const tran_low_t *input, tran_low_t *output) { +void vp9_fadst4(const tran_low_t *input, tran_low_t *output) { tran_high_t x0, x1, x2, x3; tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; @@ -163,13 +164,6 @@ static void fadst4(const tran_low_t *input, tran_low_t *output) { output[3] = (tran_low_t)fdct_round_shift(s3); } -static const transform_2d FHT_4[] = { - { fdct4, fdct4 }, // DCT_DCT = 0 - { fadst4, fdct4 }, // ADST_DCT = 1 - { fdct4, fadst4 }, // DCT_ADST = 2 - { fadst4, fadst4 } // ADST_ADST = 3 -}; - void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { if (tx_type == DCT_DCT) { @@ -203,7 +197,7 @@ void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, } } -static void fdct8(const tran_low_t *input, tran_low_t *output) { +void vp9_fdct8(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 tran_high_t t0, t1, t2, t3; // needs32 tran_high_t x0, x1, x2, x3; // canbe16 @@ -331,7 +325,7 @@ void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) { // Rows for (i = 0; i < 8; ++i) { - fdct8(&intermediate[i * 8], &final_output[i * 8]); + vp9_fdct8(&intermediate[i * 8], &final_output[i * 8]); for (j = 0; j < 8; ++j) final_output[j + i * 8] /= 2; } @@ -413,7 +407,7 @@ void vp9_fdct8x8_quant_c(const int16_t *input, int stride, // Rows for (i = 0; i < 8; ++i) { - fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]); + vp9_fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]); for (j = 0; j < 8; ++j) coeff_ptr[j + i * 8] /= 2; } @@ -641,7 +635,7 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { } } -static void fadst8(const tran_low_t *input, tran_low_t *output) { +void vp9_fadst8(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; tran_high_t x0 = input[7]; @@ -712,13 +706,6 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) { output[7] = (tran_low_t)-x1; } -static const transform_2d FHT_8[] = { - { fdct8, fdct8 }, // DCT_DCT = 0 - { fadst8, fdct8 }, // ADST_DCT = 1 - { fdct8, fadst8 }, // DCT_ADST = 2 - { fadst8, fadst8 } // ADST_ADST = 3 -}; - void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { if (tx_type == DCT_DCT) { @@ -807,7 +794,7 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { } // Rewrote to use same algorithm as others. -static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { +void vp9_fdct16(const tran_low_t in[16], tran_low_t out[16]) { tran_high_t step1[8]; // canbe16 tran_high_t step2[8]; // canbe16 tran_high_t step3[8]; // canbe16 @@ -948,7 +935,7 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { out[15] = (tran_low_t)fdct_round_shift(temp2); } -static void fadst16(const tran_low_t *input, tran_low_t *output) { +void vp9_fadst16(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; tran_high_t s9, s10, s11, s12, s13, s14, s15; @@ -1111,13 +1098,6 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) { output[15] = (tran_low_t)-x1; } -static const transform_2d FHT_16[] = { - { fdct16, fdct16 }, // DCT_DCT = 0 - { fadst16, fdct16 }, // ADST_DCT = 1 - { fdct16, fadst16 }, // DCT_ADST = 2 - { fadst16, fadst16 } // ADST_ADST = 3 -}; - void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { if (tx_type == DCT_DCT) { @@ -1162,7 +1142,7 @@ static INLINE tran_high_t half_round_shift(tran_high_t input) { return rv; } -static void fdct32(const tran_high_t *input, tran_high_t *output, int round) { +void vp9_fdct32(const tran_high_t *input, tran_high_t *output, int round) { tran_high_t step[32]; // Stage 1 step[0] = input[0] + input[(32 - 1)]; @@ -1505,7 +1485,7 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { tran_high_t temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; - fdct32(temp_in, temp_out, 0); + vp9_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; } @@ -1515,7 +1495,7 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { tran_high_t temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; - fdct32(temp_in, temp_out, 0); + vp9_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); @@ -1534,7 +1514,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { tran_high_t temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; - fdct32(temp_in, temp_out, 0); + vp9_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) // TODO(cd): see quality impact of only doing // output[j * 32 + i] = (temp_out[j] + 1) >> 2; @@ -1547,7 +1527,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { tran_high_t temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; - fdct32(temp_in, temp_out, 1); + vp9_fdct32(temp_in, temp_out, 1); for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)temp_out[j]; } diff --git a/vp9/encoder/vp9_dct.h b/vp9/encoder/vp9_dct.h new file mode 100644 index 000000000..49afcbbd5 --- /dev/null +++ b/vp9/encoder/vp9_dct.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_VP9_DCT_H_ +#define VP9_ENCODER_VP9_DCT_H_ + +#include "vp9/common/vp9_idct.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp9_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride); +void vp9_highbd_fdct8x8_c(const int16_t *input, tran_low_t *output, int stride); +void vp9_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, + int stride); +void vp9_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride); +void vp9_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, + int stride); + +void vp9_fdct4(const tran_low_t *input, tran_low_t *output); +void vp9_fadst4(const tran_low_t *input, tran_low_t *output); +void vp9_fdct8(const tran_low_t *input, tran_low_t *output); +void vp9_fadst8(const tran_low_t *input, tran_low_t *output); +void vp9_fdct16(const tran_low_t in[16], tran_low_t out[16]); +void vp9_fadst16(const tran_low_t *input, tran_low_t *output); +void vp9_fdct32(const tran_high_t *input, tran_high_t *output, int round); + +static const transform_2d FHT_4[] = { + { vp9_fdct4, vp9_fdct4 }, // DCT_DCT = 0 + { vp9_fadst4, vp9_fdct4 }, // ADST_DCT = 1 + { vp9_fdct4, vp9_fadst4 }, // DCT_ADST = 2 + { vp9_fadst4, vp9_fadst4 } // ADST_ADST = 3 +}; + +static const transform_2d FHT_8[] = { + { vp9_fdct8, vp9_fdct8 }, // DCT_DCT = 0 + { vp9_fadst8, vp9_fdct8 }, // ADST_DCT = 1 + { vp9_fdct8, vp9_fadst8 }, // DCT_ADST = 2 + { vp9_fadst8, vp9_fadst8 } // ADST_ADST = 3 +}; + +static const transform_2d FHT_16[] = { + { vp9_fdct16, vp9_fdct16 }, // DCT_DCT = 0 + { vp9_fadst16, vp9_fdct16 }, // ADST_DCT = 1 + { vp9_fdct16, vp9_fadst16 }, // DCT_ADST = 2 + { vp9_fadst16, vp9_fadst16 } // ADST_ADST = 3 +}; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_DCT_H_ diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 651b4c168..288360a84 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -24,6 +24,7 @@ VP9_CX_SRCS-yes += encoder/vp9_context_tree.h VP9_CX_SRCS-yes += encoder/vp9_cost.h VP9_CX_SRCS-yes += encoder/vp9_cost.c VP9_CX_SRCS-yes += encoder/vp9_dct.c +VP9_CX_SRCS-yes += encoder/vp9_dct.h VP9_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/vp9_denoiser.c VP9_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/vp9_denoiser.h VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c -- 2.40.0