From 08d2f548007fd8d6fd41da8ef7fdb488b6485af3 Mon Sep 17 00:00:00 2001 From: Peter de Rivaz Date: Mon, 10 Nov 2014 16:17:49 +0000 Subject: [PATCH] Fixed idct16x16_10 highbitdepth transform In the case when there are only non-zero coefficients in the first 4x4 block a special routine is called. The highbitdepth optimized version of this routine examined the wrong positions when deciding whether to call an assembler or C inverse transform. Change-Id: I62da663ca11775dadb66e402e42f4a1cb1927893 --- vp9/common/x86/vp9_idct_intrin_sse2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index 60663f3be..c5406b4cc 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -4463,11 +4463,11 @@ void vp9_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, // Find the min & max for the row transform // Since all non-zero dct coefficients are in upper-left 4x4 area, // we only need to consider first 4 rows here. - max_input = _mm_max_epi16(inptr[0], inptr[2]); - min_input = _mm_min_epi16(inptr[0], inptr[2]); + max_input = _mm_max_epi16(inptr[0], inptr[1]); + min_input = _mm_min_epi16(inptr[0], inptr[1]); for (i = 2; i < 4; i++) { - max_input = _mm_max_epi16(max_input, inptr[2*i]); - min_input = _mm_min_epi16(min_input, inptr[2*i]); + max_input = _mm_max_epi16(max_input, inptr[i]); + min_input = _mm_min_epi16(min_input, inptr[i]); } max_input = _mm_cmpgt_epi16(max_input, max); min_input = _mm_cmplt_epi16(min_input, min); -- 2.40.0