From: Johann Date: Fri, 30 Nov 2018 23:42:57 +0000 (-0800) Subject: quantize 32x32: saturate dqcoeff on x86 X-Git-Tag: v1.8.0~112 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5fbc7a286b4d72883392fdbb10ec52bace662f66;p=libvpx quantize 32x32: saturate dqcoeff on x86 This slows down low bitdepth builds but is necessary to obtain correct values. BUG=webm:1448 Change-Id: I4ca9145f576089bb8496fcfeedeb556dc8fe6574 --- diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc index fc648e8cc..c623bfb5b 100644 --- a/test/vp9_quantize_test.cc +++ b/test/vp9_quantize_test.cc @@ -496,7 +496,6 @@ INSTANTIATE_TEST_CASE_P( #endif // HAVE_SSE2 #if HAVE_SSSE3 -#if CONFIG_VP9_HIGHBITDEPTH #if ARCH_X86_64 INSTANTIATE_TEST_CASE_P( SSSE3, VP9QuantizeTest, @@ -521,35 +520,9 @@ INSTANTIATE_TEST_CASE_P( false))); #endif // ARCH_X86_64 -#else -#if ARCH_X86_64 -INSTANTIATE_TEST_CASE_P( - SSSE3, VP9QuantizeTest, - ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c, - VPX_BITS_8, 16, false), - make_tuple(&QuantFPWrapper, - &QuantFPWrapper, VPX_BITS_8, - 16, true), - make_tuple(&QuantFPWrapper, - &QuantFPWrapper, - VPX_BITS_8, 32, true))); - -#else -INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest, - ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, - &vpx_quantize_b_c, - VPX_BITS_8, 16, false))); -#endif // ARCH_X86_64 -// TODO(webm:1448): lowbd truncates results in C. -INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest, - ::testing::Values(make_tuple( - &vpx_quantize_b_32x32_ssse3, - &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false))); -#endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_SSSE3 #if HAVE_AVX -#if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest, ::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c, @@ -557,17 +530,6 @@ INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest, make_tuple(&vpx_quantize_b_32x32_avx, &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false))); -#else -INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest, - ::testing::Values(make_tuple(&vpx_quantize_b_avx, - &vpx_quantize_b_c, - VPX_BITS_8, 16, false))); -// TODO(webm:1448): lowbd truncates results in C. -INSTANTIATE_TEST_CASE_P(DISABLED_AVX, VP9QuantizeTest, - ::testing::Values(make_tuple(&vpx_quantize_b_32x32_avx, - &vpx_quantize_b_32x32_c, - VPX_BITS_8, 32, false))); -#endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_AVX #if ARCH_X86_64 && HAVE_AVX2 @@ -576,7 +538,7 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values(make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 16, true))); -#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH +#endif // HAVE_AVX2 // TODO(webm:1448): dqcoeff is not handled correctly in HBD builds. #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH diff --git a/vpx_dsp/quantize.c b/vpx_dsp/quantize.c index 82a659592..0e6a0b83f 100644 --- a/vpx_dsp/quantize.c +++ b/vpx_dsp/quantize.c @@ -12,6 +12,7 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/quantize.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, @@ -259,7 +260,15 @@ void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, 15; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; +#if (ARCH_X86 || ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH + // When tran_low_t is only 16 bits dqcoeff can outrange it. Rather than + // truncating with a cast, saturate the value. This is easier to implement + // on x86 and preserves the sign of the value. + dqcoeff_ptr[rc] = + clamp(qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2, INT16_MIN, INT16_MAX); +#else dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; +#endif // ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH if (tmp) eob = idx_arr[i]; } diff --git a/vpx_dsp/x86/quantize_ssse3.h b/vpx_dsp/x86/quantize_ssse3.h index 35223d7b4..e8d2a0577 100644 --- a/vpx_dsp/x86/quantize_ssse3.h +++ b/vpx_dsp/x86/quantize_ssse3.h @@ -24,7 +24,6 @@ static INLINE void calculate_dqcoeff_and_store_32x32(const __m128i qcoeff, // Un-sign to bias rounding like C. const __m128i coeff = _mm_abs_epi16(qcoeff); -#if CONFIG_VP9_HIGHBITDEPTH const __m128i sign_0 = _mm_unpacklo_epi16(zero, qcoeff); const __m128i sign_1 = _mm_unpackhi_epi16(zero, qcoeff); @@ -40,17 +39,12 @@ static INLINE void calculate_dqcoeff_and_store_32x32(const __m128i qcoeff, dqcoeff32_0 = _mm_sign_epi32(dqcoeff32_0, sign_0); dqcoeff32_1 = _mm_sign_epi32(dqcoeff32_1, sign_1); +#if CONFIG_VP9_HIGHBITDEPTH _mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0); _mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1); #else - __m128i dqcoeff16 = _mm_mullo_epi16(coeff, dequant); - (void)zero; - - dqcoeff16 = _mm_srli_epi16(dqcoeff16, 1); - - dqcoeff16 = _mm_sign_epi16(dqcoeff16, qcoeff); - - _mm_store_si128((__m128i *)(dqcoeff), dqcoeff16); + _mm_store_si128((__m128i *)(dqcoeff), + _mm_packs_epi32(dqcoeff32_0, dqcoeff32_1)); #endif // CONFIG_VP9_HIGHBITDEPTH }