From 659c2c98e1d14987c75538bfc80ab09d0c3729fc Mon Sep 17 00:00:00 2001 From: Yi Luo Date: Wed, 23 Mar 2016 12:10:52 -0700 Subject: [PATCH] Misc. updates for highbd changes - Use Makefile to control the build for highbd_fwd_txfm_sse4.c. - Fixed hybrid transform (HT) types due to recent update. - Added new unit test cases for highbd HT. Change-Id: Ifd768a9b429a8c21ed40c1de8152fb5ac71e2f90 --- test/vp10_fht4x4_test.cc | 135 ++++++++++++++++++++++++ vp10/encoder/x86/highbd_fwd_txfm_sse4.c | 18 ++-- vp10/vp10cx.mk | 2 + 3 files changed, 143 insertions(+), 12 deletions(-) diff --git a/test/vp10_fht4x4_test.cc b/test/vp10_fht4x4_test.cc index bee1a0cd6..a5f64fce6 100644 --- a/test/vp10_fht4x4_test.cc +++ b/test/vp10_fht4x4_test.cc @@ -34,6 +34,19 @@ void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, vp10_fht4x4_c(in, out, stride, tx_type); } +#if CONFIG_VP9_HIGHBITDEPTH +typedef void (*IhighbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type, int bd); + +typedef std::tr1::tuple +HighbdHt4x4Param; + +void highbe_fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, + int tx_type) { + vp10_highbd_fht4x4_c(in, out, stride, tx_type); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + class VP10Trans4x4HT : public libvpx_test::TransformTestBase, public ::testing::TestWithParam { @@ -69,6 +82,43 @@ TEST_P(VP10Trans4x4HT, CoeffCheck) { RunCoeffCheck(); } +#if CONFIG_VP9_HIGHBITDEPTH +class VP10HighbdTrans4x4HT + : public libvpx_test::TransformTestBase, + public ::testing::TestWithParam { + public: + virtual ~VP10HighbdTrans4x4HT() {} + + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + tx_type_ = GET_PARAM(2); + pitch_ = 4; + fwd_txfm_ref = highbe_fht4x4_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + } + virtual void TearDown() { libvpx_test::ClearSystemState(); } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { + fwd_txfm_(in, out, stride, tx_type_); + } + + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { + inv_txfm_(out, dst, stride, tx_type_, bit_depth_); + } + + FhtFunc fwd_txfm_; + IhighbdHtFunc inv_txfm_; +}; + +TEST_P(VP10HighbdTrans4x4HT, HighbdCoeffCheck) { + RunCoeffCheck(); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + using std::tr1::make_tuple; #if HAVE_SSE2 @@ -106,4 +156,89 @@ INSTANTIATE_TEST_CASE_P( #endif // !CONFIG_EXT_TX #endif // HAVE_SSE2 +#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + SSE4_1, VP10HighbdTrans4x4HT, + ::testing::Values( +#if !CONFIG_EXT_TX + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3, + VPX_BITS_10, 16))); + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3, + VPX_BITS_12, 16))); +#else + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 4, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 5, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 6, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 7, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 8, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 10, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 11, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 12, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 13, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 14, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 15, + VPX_BITS_10, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 4, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 5, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 6, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 7, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 8, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 10, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 11, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 12, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 13, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 14, + VPX_BITS_12, 16), + make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 15, + VPX_BITS_12, 16))); +#endif // !CONFIG_EXT_TX +#endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH + } // namespace diff --git a/vp10/encoder/x86/highbd_fwd_txfm_sse4.c b/vp10/encoder/x86/highbd_fwd_txfm_sse4.c index 3dc0e56d9..5fa4fc8dc 100644 --- a/vp10/encoder/x86/highbd_fwd_txfm_sse4.c +++ b/vp10/encoder/x86/highbd_fwd_txfm_sse4.c @@ -16,9 +16,6 @@ #include "vpx_dsp/txfm_common.h" #include "vpx_ports/mem.h" - -#if CONFIG_VP9_HIGHBITDEPTH - static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in, int stride, int flipud, int fliplr) { const __m128i k__nonzero_bias_a = _mm_setr_epi32(0, 1, 1, 1); @@ -193,13 +190,12 @@ void vp10_highbd_fht4x4_sse4_1(const int16_t *input, tran_low_t *output, case FLIPADST_ADST: vp10_highbd_fht4x4_c(input, output, stride, tx_type); break; - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: - case DST_FLIPADST: - case FLIPADST_DST: + case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: vp10_highbd_fht4x4_c(input, output, stride, tx_type); break; #endif // CONFIG_EXT_TX @@ -207,5 +203,3 @@ void vp10_highbd_fht4x4_sse4_1(const int16_t *input, tran_low_t *output, assert(0); } } - -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk index 8b2d0d747..810005ce0 100644 --- a/vp10/vp10cx.mk +++ b/vp10/vp10cx.mk @@ -112,7 +112,9 @@ endif VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.c VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c +ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP10_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c +endif ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes) VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoiser_sse2.c -- 2.40.0