From 08d2f548007fd8d6fd41da8ef7fdb488b6485af3 Mon Sep 17 00:00:00 2001
From: Peter de Rivaz <peter.derivaz@gmail.com>
Date: Mon, 10 Nov 2014 16:17:49 +0000
Subject: [PATCH] Fixed idct16x16_10 highbitdepth transform

In the case when there are only non-zero coefficients
in the first 4x4 block a special routine is called.
The highbitdepth optimized version of this routine
examined the wrong positions when deciding whether
to call an assembler or C inverse transform.

Change-Id: I62da663ca11775dadb66e402e42f4a1cb1927893
---
 vp9/common/x86/vp9_idct_intrin_sse2.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 60663f3be..c5406b4cc 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -4463,11 +4463,11 @@ void vp9_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
   // Find the min & max for the row transform
   // Since all non-zero dct coefficients are in upper-left 4x4 area,
   // we only need to consider first 4 rows here.
-  max_input = _mm_max_epi16(inptr[0], inptr[2]);
-  min_input = _mm_min_epi16(inptr[0], inptr[2]);
+  max_input = _mm_max_epi16(inptr[0], inptr[1]);
+  min_input = _mm_min_epi16(inptr[0], inptr[1]);
   for (i = 2; i < 4; i++) {
-    max_input = _mm_max_epi16(max_input, inptr[2*i]);
-    min_input = _mm_min_epi16(min_input, inptr[2*i]);
+    max_input = _mm_max_epi16(max_input, inptr[i]);
+    min_input = _mm_min_epi16(min_input, inptr[i]);
   }
   max_input = _mm_cmpgt_epi16(max_input, max);
   min_input = _mm_cmplt_epi16(min_input, min);
-- 
2.40.0