From 5780c4cbd592c92da567609f737dbf823b055cd6 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Mon, 4 Feb 2013 16:49:17 -0800 Subject: [PATCH] Added vp9_short_idct1_32x32_c and called this function in vp9_dequant_idct_add_32x32_c when eob == 1. For the test clip used, the decoder performance improved by 21+%. Based on Yaowu's 16 point idct work. Change-Id: Ib579a90fed531d45777980e04bf0c9b23c093c43 --- vp9/common/vp9_idctllm.c | 10 ++++++++++ vp9/common/vp9_rtcd_defs.sh | 3 +++ vp9/decoder/vp9_dequantize.c | 23 ++++++++++++++++------- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 7dd2776f6..85f8fd7db 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -1644,6 +1644,16 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { } } +void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) { + int tmp; + int16_t out; + tmp = input[0] * cospi_16_64; + out = dct_const_round_shift(tmp); + tmp = out * cospi_16_64; + out = dct_const_round_shift(tmp); + *output = (out + 32) >> 6; +} + #else // !CONFIG_DWTDCTHYBRID #if DWT_TYPE == 53 diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 7822ee857..8f66e06c8 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -408,6 +408,9 @@ specialize vp9_short_idct1_16x16 prototype void vp9_short_idct32x32 "int16_t *input, int16_t *output, int pitch" specialize vp9_short_idct32x32 +prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output" +specialize vp9_short_idct1_32x32 + prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs" specialize vp9_ihtllm diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index e46be3ac7..18d4e59c7 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -349,13 +349,22 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, int i; if (eob) { - input[0]= input[0] * dq[0] / 2; - for (i = 1; i < 1024; i++) - input[i] = input[i] * dq[1] / 2; - vp9_short_idct32x32_c(input, output, 64); - vpx_memset(input, 0, 2048); - - add_residual(output, pred, pitch, dest, stride, 32, 32); + input[0] = input[0] * dq[0] / 2; +#if !CONFIG_DWTDCTHYBRID + if (eob == 1) { + vp9_short_idct1_32x32_c(input, output); + add_constant_residual(output[0], pred, pitch, dest, stride, 32, 32); + input[0] = 0; + } else { +#endif + for (i = 1; i < 1024; i++) + input[i] = input[i] * dq[1] / 2; + vp9_short_idct32x32_c(input, output, 64); + vpx_memset(input, 0, 2048); + add_residual(output, pred, pitch, dest, stride, 32, 32); +#if !CONFIG_DWTDCTHYBRID + } +#endif } } -- 2.40.0