#endif // HAVE_SSE2
#if HAVE_SSSE3
-#if CONFIG_VP9_HIGHBITDEPTH
#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(
SSSE3, VP9QuantizeTest,
false)));
#endif // ARCH_X86_64
-#else
-#if ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(
- SSSE3, VP9QuantizeTest,
- ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
- VPX_BITS_8, 16, false),
- make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
- &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
- 16, true),
- make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
- &QuantFPWrapper<quantize_fp_32x32_nz_c>,
- VPX_BITS_8, 32, true)));
-
-#else
-INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
- ::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
- &vpx_quantize_b_c,
- VPX_BITS_8, 16, false)));
-#endif // ARCH_X86_64
-// TODO(webm:1448): lowbd truncates results in C.
-INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest,
- ::testing::Values(make_tuple(
- &vpx_quantize_b_32x32_ssse3,
- &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false)));
-#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSSE3
#if HAVE_AVX
-#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_avx,
&vpx_quantize_b_c,
make_tuple(&vpx_quantize_b_32x32_avx,
&vpx_quantize_b_32x32_c,
VPX_BITS_8, 32, false)));
-#else
-INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,
- ::testing::Values(make_tuple(&vpx_quantize_b_avx,
- &vpx_quantize_b_c,
- VPX_BITS_8, 16, false)));
-// TODO(webm:1448): lowbd truncates results in C.
-INSTANTIATE_TEST_CASE_P(DISABLED_AVX, VP9QuantizeTest,
- ::testing::Values(make_tuple(&vpx_quantize_b_32x32_avx,
- &vpx_quantize_b_32x32_c,
- VPX_BITS_8, 32, false)));
-#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_AVX
#if ARCH_X86_64 && HAVE_AVX2
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
16, true)));
-#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
+#endif // HAVE_AVX2
// TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/quantize.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
15;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+#if (ARCH_X86 || ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH
+ // When tran_low_t is only 16 bits dqcoeff can outrange it. Rather than
+ // truncating with a cast, saturate the value. This is easier to implement
+ // on x86 and preserves the sign of the value.
+ dqcoeff_ptr[rc] =
+ clamp(qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2, INT16_MIN, INT16_MAX);
+#else
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+#endif // ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH
if (tmp) eob = idx_arr[i];
}
// Un-sign to bias rounding like C.
const __m128i coeff = _mm_abs_epi16(qcoeff);
-#if CONFIG_VP9_HIGHBITDEPTH
const __m128i sign_0 = _mm_unpacklo_epi16(zero, qcoeff);
const __m128i sign_1 = _mm_unpackhi_epi16(zero, qcoeff);
dqcoeff32_0 = _mm_sign_epi32(dqcoeff32_0, sign_0);
dqcoeff32_1 = _mm_sign_epi32(dqcoeff32_1, sign_1);
+#if CONFIG_VP9_HIGHBITDEPTH
_mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0);
_mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1);
#else
- __m128i dqcoeff16 = _mm_mullo_epi16(coeff, dequant);
- (void)zero;
-
- dqcoeff16 = _mm_srli_epi16(dqcoeff16, 1);
-
- dqcoeff16 = _mm_sign_epi16(dqcoeff16, qcoeff);
-
- _mm_store_si128((__m128i *)(dqcoeff), dqcoeff16);
+ _mm_store_si128((__m128i *)(dqcoeff),
+ _mm_packs_epi32(dqcoeff32_0, dqcoeff32_1));
#endif // CONFIG_VP9_HIGHBITDEPTH
}