From: Johann Date: Mon, 30 Jan 2017 19:39:02 +0000 (-0800) Subject: satd highbd sse2: use tran_low_t for coeff X-Git-Tag: v1.7.0~758^2~1 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2ba383474dd0f9a2f4abc4d0ef3af434153cdbeb;p=libvpx satd highbd sse2: use tran_low_t for coeff BUG=webm:1365 Change-Id: I013659f6b9fbf9cc52ab840eae520fe0b5f883fb --- diff --git a/test/avg_test.cc b/test/avg_test.cc index b908281c1..09cefe816 100644 --- a/test/avg_test.cc +++ b/test/avg_test.cc @@ -348,16 +348,12 @@ INSTANTIATE_TEST_CASE_P( make_tuple(64, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c))); -// TODO(jingning): Remove the highbitdepth flag once the SIMD functions are -// in place. -#if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(SSE2, SatdTest, ::testing::Values(make_tuple(16, &vpx_satd_sse2), make_tuple(64, &vpx_satd_sse2), make_tuple(256, &vpx_satd_sse2), make_tuple(1024, &vpx_satd_sse2))); #endif -#endif #if HAVE_NEON INSTANTIATE_TEST_CASE_P( @@ -383,6 +379,8 @@ INSTANTIATE_TEST_CASE_P( make_tuple(64, &vpx_int_pro_col_neon, &vpx_int_pro_col_c))); +// TODO(jingning): Remove the highbitdepth flag once the SIMD functions are +// in place. #if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(NEON, SatdTest, ::testing::Values(make_tuple(16, &vpx_satd_neon), diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 3cb2011b8..347e341cd 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -894,7 +894,7 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { specialize qw/vpx_hadamard_16x16/; add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length"; - specialize qw/vpx_satd/; + specialize qw/vpx_satd sse2/; } else { add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; specialize qw/vpx_hadamard_8x8 sse2 neon msa/, "$ssse3_x86_64"; diff --git a/vpx_dsp/x86/avg_intrin_sse2.c b/vpx_dsp/x86/avg_intrin_sse2.c index b0a104bad..cc6935d59 100644 --- a/vpx_dsp/x86/avg_intrin_sse2.c +++ b/vpx_dsp/x86/avg_intrin_sse2.c @@ -11,6 +11,8 @@ #include #include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/x86/fdct.h" #include "vpx_ports/mem.h" void vpx_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, @@ -283,13 +285,13 @@ void vpx_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, } } -int vpx_satd_sse2(const int16_t *coeff, int length) { +int vpx_satd_sse2(const tran_low_t *coeff, int length) { int i; const __m128i zero = _mm_setzero_si128(); __m128i accum = zero; for (i = 0; i < length; i += 8) { - const __m128i src_line = _mm_load_si128((const __m128i *)coeff); + const __m128i src_line = load_tran_low(coeff); const __m128i inv = _mm_sub_epi16(zero, src_line); const __m128i abs = _mm_max_epi16(src_line, inv); // abs(src_line) const __m128i abs_lo = _mm_unpacklo_epi16(abs, zero);