From d79850e60c2418cceb6cc34f582f37d0736a871a Mon Sep 17 00:00:00 2001 From: Peter de Rivaz Date: Mon, 23 Nov 2015 19:39:37 +0000 Subject: [PATCH] Bug fix for high bitdepth using flipped transforms. Fixes mismatch and performance drop. Change-Id: Ib99711eb3b78257a8105073e2b6d7031459357bb --- vp9/common/vp9_idct.c | 60 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index a9431f4da..a6027a9a9 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -69,6 +69,57 @@ static void maybe_flip_strides(uint8_t **dst, int *dstride, } } +#if CONFIG_VP9_HIGHBITDEPTH + +static void maybe_flip_strides16(uint16_t **dst, int *dstride, + tran_low_t **src, int *sstride, + int tx_type, int size) { + // Note that the transpose of src will be added to dst. In order to LR + // flip the addends (in dst coordinates), we UD flip the src. To UD flip + // the addends, we UD flip the dst. + switch (tx_type) { + case DCT_DCT: + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + break; + case FLIPADST_DCT: + case FLIPADST_ADST: + // flip UD + FLIPUD_PTR(*dst, *dstride, size); + break; + case DCT_FLIPADST: + case ADST_FLIPADST: + // flip LR + FLIPUD_PTR(*src, *sstride, size); + break; + case FLIPADST_FLIPADST: + // flip UD + FLIPUD_PTR(*dst, *dstride, size); + // flip LR + FLIPUD_PTR(*src, *sstride, size); + break; + case DST_DST: + case DCT_DST: + case DST_DCT: + case DST_ADST: + case ADST_DST: + break; + case DST_FLIPADST: + // flip LR + FLIPUD_PTR(*src, *sstride, size); + break; + case FLIPADST_DST: + // flip UD + FLIPUD_PTR(*dst, *dstride, size); + break; + default: + assert(0); + break; + } +} +#endif + void idst4(const tran_low_t *input, tran_low_t *output) { // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2) static const int32_t sinvalue_lookup[] = { @@ -2795,8 +2846,7 @@ void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, } #if CONFIG_EXT_TX - maybe_flip_strides((uint8_t**)&dest, - &stride, &outp, &outstride, tx_type, 4 * 2); + maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4); #endif // Sum with the destination @@ -2939,8 +2989,7 @@ void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, } #if CONFIG_EXT_TX - maybe_flip_strides((uint8_t**)&dest, - &stride, &outp, &outstride, tx_type, 8 * 2); + maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8); #endif // Sum with the destination @@ -3397,8 +3446,7 @@ void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, } #if CONFIG_EXT_TX - maybe_flip_strides((uint8_t**)&dest, &stride, - &outp, &outstride, tx_type, 16 * 2); + maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16); #endif // Sum with the destination -- 2.40.0