quantize 32x32: saturate dqcoeff on x86

author Johann <johann.koenig@duck.com>

Fri, 30 Nov 2018 23:42:57 +0000 (15:42 -0800)

committer Johann <johann.koenig@duck.com>

Sat, 1 Dec 2018 00:27:14 +0000 (16:27 -0800)
author Johann <johann.koenig@duck.com>
Fri, 30 Nov 2018 23:42:57 +0000 (15:42 -0800)
committer Johann <johann.koenig@duck.com>
Sat, 1 Dec 2018 00:27:14 +0000 (16:27 -0800)
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc

index fc648e8cc8779ea3e1f696f270c9cb2e6eeb90a1..c623bfb5b15f28f9355edf4a94344e6fce25a14c 100644 (file)
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -496,7 +496,6 @@ INSTANTIATE_TEST_CASE_P(
  #endif  // HAVE_SSE2
  
  #if HAVE_SSSE3
-#if CONFIG_VP9_HIGHBITDEPTH
  #if ARCH_X86_64
  INSTANTIATE_TEST_CASE_P(
      SSSE3, VP9QuantizeTest,
@@ -521,35 +520,9 @@ INSTANTIATE_TEST_CASE_P(
                                   false)));
  
  #endif  // ARCH_X86_64
-#else
-#if ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, VP9QuantizeTest,
-    ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
-                                 VPX_BITS_8, 16, false),
-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
-                                 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
-                                 16, true),
-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
-                                 &QuantFPWrapper<quantize_fp_32x32_nz_c>,
-                                 VPX_BITS_8, 32, true)));
-
-#else
-INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
-                        ::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
-                                                     &vpx_quantize_b_c,
-                                                     VPX_BITS_8, 16, false)));
-#endif  // ARCH_X86_64
-// TODO(webm:1448): lowbd truncates results in C.
-INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest,
-                        ::testing::Values(make_tuple(
-                            &vpx_quantize_b_32x32_ssse3,
-                            &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
  #endif  // HAVE_SSSE3
  
  #if HAVE_AVX
-#if CONFIG_VP9_HIGHBITDEPTH
  INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,
                          ::testing::Values(make_tuple(&vpx_quantize_b_avx,
                                                       &vpx_quantize_b_c,
@@ -557,17 +530,6 @@ INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,
                                            make_tuple(&vpx_quantize_b_32x32_avx,
                                                       &vpx_quantize_b_32x32_c,
                                                       VPX_BITS_8, 32, false)));
-#else
-INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,
-                        ::testing::Values(make_tuple(&vpx_quantize_b_avx,
-                                                     &vpx_quantize_b_c,
-                                                     VPX_BITS_8, 16, false)));
-// TODO(webm:1448): lowbd truncates results in C.
-INSTANTIATE_TEST_CASE_P(DISABLED_AVX, VP9QuantizeTest,
-                        ::testing::Values(make_tuple(&vpx_quantize_b_32x32_avx,
-                                                     &vpx_quantize_b_32x32_c,
-                                                     VPX_BITS_8, 32, false)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
  #endif  // HAVE_AVX
  
  #if ARCH_X86_64 && HAVE_AVX2
@@ -576,7 +538,7 @@ INSTANTIATE_TEST_CASE_P(
      ::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
                                   &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
                                   16, true)));
-#endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_AVX2
  
  // TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
  #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
diff --git a/vpx_dsp/quantize.c b/vpx_dsp/quantize.c

index 82a65959236badccd9998267f5633a78fd4d3048..0e6a0b83facb19d51984d19c4e170e0b05763f74 100644 (file)
--- a/vpx_dsp/quantize.c
+++ b/vpx_dsp/quantize.c
@@ -12,6 +12,7 @@
  
  #include "./vpx_dsp_rtcd.h"
  #include "vpx_dsp/quantize.h"
+#include "vpx_dsp/vpx_dsp_common.h"
  #include "vpx_mem/vpx_mem.h"
  
  void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
@@ -259,7 +260,15 @@ void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
            15;
  
      qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+#if (ARCH_X86 || ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH
+    // When tran_low_t is only 16 bits dqcoeff can outrange it. Rather than
+    // truncating with a cast, saturate the value. This is easier to implement
+    // on x86 and preserves the sign of the value.
+    dqcoeff_ptr[rc] =
+        clamp(qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2, INT16_MIN, INT16_MAX);
+#else
      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+#endif  // ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH
  
      if (tmp) eob = idx_arr[i];
    }
diff --git a/vpx_dsp/x86/quantize_ssse3.h b/vpx_dsp/x86/quantize_ssse3.h

index 35223d7b448b874352c212a96f97f59093c8757d..e8d2a057710b17f095b943e023c3f5bd10bda081 100644 (file)
--- a/vpx_dsp/x86/quantize_ssse3.h
+++ b/vpx_dsp/x86/quantize_ssse3.h
@@ -24,7 +24,6 @@ static INLINE void calculate_dqcoeff_and_store_32x32(const __m128i qcoeff,
    // Un-sign to bias rounding like C.
    const __m128i coeff = _mm_abs_epi16(qcoeff);
  
-#if CONFIG_VP9_HIGHBITDEPTH
    const __m128i sign_0 = _mm_unpacklo_epi16(zero, qcoeff);
    const __m128i sign_1 = _mm_unpackhi_epi16(zero, qcoeff);
  
@@ -40,17 +39,12 @@ static INLINE void calculate_dqcoeff_and_store_32x32(const __m128i qcoeff,
    dqcoeff32_0 = _mm_sign_epi32(dqcoeff32_0, sign_0);
    dqcoeff32_1 = _mm_sign_epi32(dqcoeff32_1, sign_1);
  
+#if CONFIG_VP9_HIGHBITDEPTH
    _mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0);
    _mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1);
  #else
-  __m128i dqcoeff16 = _mm_mullo_epi16(coeff, dequant);
-  (void)zero;
-
-  dqcoeff16 = _mm_srli_epi16(dqcoeff16, 1);
-
-  dqcoeff16 = _mm_sign_epi16(dqcoeff16, qcoeff);
-
-  _mm_store_si128((__m128i *)(dqcoeff), dqcoeff16);
+  _mm_store_si128((__m128i *)(dqcoeff),
+                  _mm_packs_epi32(dqcoeff32_0, dqcoeff32_1));
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  }
author	Johann <johann.koenig@duck.com>
	Fri, 30 Nov 2018 23:42:57 +0000 (15:42 -0800)
committer	Johann <johann.koenig@duck.com>
	Sat, 1 Dec 2018 00:27:14 +0000 (16:27 -0800)
test/vp9_quantize_test.cc		patch \| blob \| history
vpx_dsp/quantize.c		patch \| blob \| history
vpx_dsp/x86/quantize_ssse3.h		patch \| blob \| history