]> granicus.if.org Git - libvpx/commitdiff
quantize 32x32: saturate dqcoeff on x86
authorJohann <johann.koenig@duck.com>
Fri, 30 Nov 2018 23:42:57 +0000 (15:42 -0800)
committerJohann <johann.koenig@duck.com>
Sat, 1 Dec 2018 00:27:14 +0000 (16:27 -0800)
This slows down low bitdepth builds but is necessary to obtain correct
values.

BUG=webm:1448

Change-Id: I4ca9145f576089bb8496fcfeedeb556dc8fe6574

test/vp9_quantize_test.cc
vpx_dsp/quantize.c
vpx_dsp/x86/quantize_ssse3.h

index fc648e8cc8779ea3e1f696f270c9cb2e6eeb90a1..c623bfb5b15f28f9355edf4a94344e6fce25a14c 100644 (file)
@@ -496,7 +496,6 @@ INSTANTIATE_TEST_CASE_P(
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
-#if CONFIG_VP9_HIGHBITDEPTH
 #if ARCH_X86_64
 INSTANTIATE_TEST_CASE_P(
     SSSE3, VP9QuantizeTest,
@@ -521,35 +520,9 @@ INSTANTIATE_TEST_CASE_P(
                                  false)));
 
 #endif  // ARCH_X86_64
-#else
-#if ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, VP9QuantizeTest,
-    ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
-                                 VPX_BITS_8, 16, false),
-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
-                                 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
-                                 16, true),
-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
-                                 &QuantFPWrapper<quantize_fp_32x32_nz_c>,
-                                 VPX_BITS_8, 32, true)));
-
-#else
-INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
-                        ::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
-                                                     &vpx_quantize_b_c,
-                                                     VPX_BITS_8, 16, false)));
-#endif  // ARCH_X86_64
-// TODO(webm:1448): lowbd truncates results in C.
-INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest,
-                        ::testing::Values(make_tuple(
-                            &vpx_quantize_b_32x32_ssse3,
-                            &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_SSSE3
 
 #if HAVE_AVX
-#if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,
                         ::testing::Values(make_tuple(&vpx_quantize_b_avx,
                                                      &vpx_quantize_b_c,
@@ -557,17 +530,6 @@ INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,
                                           make_tuple(&vpx_quantize_b_32x32_avx,
                                                      &vpx_quantize_b_32x32_c,
                                                      VPX_BITS_8, 32, false)));
-#else
-INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,
-                        ::testing::Values(make_tuple(&vpx_quantize_b_avx,
-                                                     &vpx_quantize_b_c,
-                                                     VPX_BITS_8, 16, false)));
-// TODO(webm:1448): lowbd truncates results in C.
-INSTANTIATE_TEST_CASE_P(DISABLED_AVX, VP9QuantizeTest,
-                        ::testing::Values(make_tuple(&vpx_quantize_b_32x32_avx,
-                                                     &vpx_quantize_b_32x32_c,
-                                                     VPX_BITS_8, 32, false)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_AVX
 
 #if ARCH_X86_64 && HAVE_AVX2
@@ -576,7 +538,7 @@ INSTANTIATE_TEST_CASE_P(
     ::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
                                  &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
                                  16, true)));
-#endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_AVX2
 
 // TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
index 82a65959236badccd9998267f5633a78fd4d3048..0e6a0b83facb19d51984d19c4e170e0b05763f74 100644 (file)
@@ -12,6 +12,7 @@
 
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/quantize.h"
+#include "vpx_dsp/vpx_dsp_common.h"
 #include "vpx_mem/vpx_mem.h"
 
 void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
@@ -259,7 +260,15 @@ void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
           15;
 
     qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+#if (ARCH_X86 || ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH
+    // When tran_low_t is only 16 bits dqcoeff can outrange it. Rather than
+    // truncating with a cast, saturate the value. This is easier to implement
+    // on x86 and preserves the sign of the value.
+    dqcoeff_ptr[rc] =
+        clamp(qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2, INT16_MIN, INT16_MAX);
+#else
     dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+#endif  // ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH
 
     if (tmp) eob = idx_arr[i];
   }
index 35223d7b448b874352c212a96f97f59093c8757d..e8d2a057710b17f095b943e023c3f5bd10bda081 100644 (file)
@@ -24,7 +24,6 @@ static INLINE void calculate_dqcoeff_and_store_32x32(const __m128i qcoeff,
   // Un-sign to bias rounding like C.
   const __m128i coeff = _mm_abs_epi16(qcoeff);
 
-#if CONFIG_VP9_HIGHBITDEPTH
   const __m128i sign_0 = _mm_unpacklo_epi16(zero, qcoeff);
   const __m128i sign_1 = _mm_unpackhi_epi16(zero, qcoeff);
 
@@ -40,17 +39,12 @@ static INLINE void calculate_dqcoeff_and_store_32x32(const __m128i qcoeff,
   dqcoeff32_0 = _mm_sign_epi32(dqcoeff32_0, sign_0);
   dqcoeff32_1 = _mm_sign_epi32(dqcoeff32_1, sign_1);
 
+#if CONFIG_VP9_HIGHBITDEPTH
   _mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0);
   _mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1);
 #else
-  __m128i dqcoeff16 = _mm_mullo_epi16(coeff, dequant);
-  (void)zero;
-
-  dqcoeff16 = _mm_srli_epi16(dqcoeff16, 1);
-
-  dqcoeff16 = _mm_sign_epi16(dqcoeff16, qcoeff);
-
-  _mm_store_si128((__m128i *)(dqcoeff), dqcoeff16);
+  _mm_store_si128((__m128i *)(dqcoeff),
+                  _mm_packs_epi32(dqcoeff32_0, dqcoeff32_1));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 }