From c21d4370527e1c31c88b5268b7e0a4d99ae0c557 Mon Sep 17 00:00:00 2001 From: James Zern Date: Sat, 2 Apr 2016 11:04:38 -0700 Subject: [PATCH] vpx_fdct32x32_1_msa: fix accumulator overflow Change-Id: I33a5432eda3416382e1cea06b45082c0c65faa75 --- vpx_dsp/mips/fwd_dct32x32_msa.c | 36 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/vpx_dsp/mips/fwd_dct32x32_msa.c b/vpx_dsp/mips/fwd_dct32x32_msa.c index 2115a348c..f29c14b3d 100644 --- a/vpx_dsp/mips/fwd_dct32x32_msa.c +++ b/vpx_dsp/mips/fwd_dct32x32_msa.c @@ -933,23 +933,21 @@ void vpx_fdct32x32_rd_msa(const int16_t *input, int16_t *out, } void vpx_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) { - out[1] = 0; - - out[0] = LD_HADD(input, stride); - out[0] += LD_HADD(input + 8, stride); - out[0] += LD_HADD(input + 16, stride); - out[0] += LD_HADD(input + 24, stride); - out[0] += LD_HADD(input + 32 * 8, stride); - out[0] += LD_HADD(input + 32 * 8 + 8, stride); - out[0] += LD_HADD(input + 32 * 8 + 16, stride); - out[0] += LD_HADD(input + 32 * 8 + 24, stride); - out[0] += LD_HADD(input + 32 * 16, stride); - out[0] += LD_HADD(input + 32 * 16 + 8, stride); - out[0] += LD_HADD(input + 32 * 16 + 16, stride); - out[0] += LD_HADD(input + 32 * 16 + 24, stride); - out[0] += LD_HADD(input + 32 * 24, stride); - out[0] += LD_HADD(input + 32 * 24 + 8, stride); - out[0] += LD_HADD(input + 32 * 24 + 16, stride); - out[0] += LD_HADD(input + 32 * 24 + 24, stride); - out[0] >>= 3; + int sum = LD_HADD(input, stride); + sum += LD_HADD(input + 8, stride); + sum += LD_HADD(input + 16, stride); + sum += LD_HADD(input + 24, stride); + sum += LD_HADD(input + 32 * 8, stride); + sum += LD_HADD(input + 32 * 8 + 8, stride); + sum += LD_HADD(input + 32 * 8 + 16, stride); + sum += LD_HADD(input + 32 * 8 + 24, stride); + sum += LD_HADD(input + 32 * 16, stride); + sum += LD_HADD(input + 32 * 16 + 8, stride); + sum += LD_HADD(input + 32 * 16 + 16, stride); + sum += LD_HADD(input + 32 * 16 + 24, stride); + sum += LD_HADD(input + 32 * 24, stride); + sum += LD_HADD(input + 32 * 24 + 8, stride); + sum += LD_HADD(input + 32 * 24 + 16, stride); + sum += LD_HADD(input + 32 * 24 + 24, stride); + out[0] = (int16_t)(sum >> 3); } -- 2.40.0