]> granicus.if.org Git - libvpx/commitdiff
Use saturated addition in SSSE3 of 32x32 quant
authorJingning Han <jingning@google.com>
Wed, 4 Sep 2013 19:03:28 +0000 (12:03 -0700)
committerJingning Han <jingning@google.com>
Thu, 5 Sep 2013 19:49:12 +0000 (12:49 -0700)
The 32x32 forward transform can potentially reach peak coefficient
value close to 32700, while the rounding factor can go upto 610.
This could cause overflow issue in the SSSE3 implementation of 32x32
quantization process.

This commit resolves this issue by replacing the addition operations
with saturated addition operations in 32x32 block quantization.

Change-Id: Id6b98996458e16c5b6241338ca113c332bef6e70

vp9/common/vp9_rtcd_defs.sh
vp9/encoder/vp9_quantize.c
vp9/encoder/x86/vp9_quantize_ssse3.asm

index 104db6aebdfec77e75e10cdaf8d473d9733bfaad..615e07ba9d348f6dd4fc671ff3cdc1281d8b4c2e 100644 (file)
@@ -701,7 +701,7 @@ prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_b
 specialize vp9_quantize_b $ssse3_x86_64
 
 prototype void vp9_quantize_b_32x32 "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
-specialize vp9_quantize_b_32x32 # $ssse3_x86_64 FIXME(jingning): need a unit test on thisbefore enabled
+specialize vp9_quantize_b_32x32 $ssse3_x86_64
 
 #
 # Structured Similarity (SSIM)
index fb0e4707acfbf9fc8beb65eec1ee974b4fad61c2..96abeff388c70c8e2064e1444a508df9b239fcec 100644 (file)
@@ -135,6 +135,7 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
 
       if (x >= zbin) {
         x += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+        x  = clamp(x, INT16_MIN, INT16_MAX);
         y  = (((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) *
               quant_shift_ptr[rc != 0]) >> 15;      // quantize (x)
 
index 7deb9815a3094aa3d98235e9f0eebeb797c2aa84..ae0d6cd3c562eb49801f9f4f2f9d3747ad8f804f 100644 (file)
@@ -70,9 +70,15 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
   pcmpgtw                         m7, m6, m0               ; m7 = c[i] >= zbin
   punpckhqdq                      m0, m0
   pcmpgtw                        m12, m11, m0              ; m12 = c[i] >= zbin
+%ifidn %1, b_32x32
+  paddsw                          m6, m1
+  punpckhqdq                      m1, m1
+  paddsw                         m11, m1
+%else
   paddw                           m6, m1                   ; m6 += round
   punpckhqdq                      m1, m1
   paddw                          m11, m1                   ; m11 += round
+%endif
   pmulhw                          m8, m6, m2               ; m8 = m6*q>>16
   punpckhqdq                      m2, m2
   pmulhw                         m13, m11, m2              ; m13 = m11*q>>16
@@ -126,9 +132,12 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
   pmovmskb                        r2, m12
   or                              r6, r2
   jz .skip_iter
-%endif
+  paddsw                          m6, m1
+  paddsw                         m11, m1
+%else
   paddw                           m6, m1                   ; m6 += round
   paddw                          m11, m1                   ; m11 += round
+%endif
   pmulhw                         m14, m6, m2               ; m14 = m6*q>>16
   pmulhw                         m13, m11, m2              ; m13 = m11*q>>16
   paddw                          m14, m6                   ; m14 += m6