]> granicus.if.org Git - libvpx/commitdiff
satd highbd sse2: use tran_low_t for coeff
authorJohann <johannkoenig@google.com>
Mon, 30 Jan 2017 19:39:02 +0000 (11:39 -0800)
committerJohann <johannkoenig@google.com>
Wed, 1 Feb 2017 19:55:16 +0000 (11:55 -0800)
BUG=webm:1365

Change-Id: I013659f6b9fbf9cc52ab840eae520fe0b5f883fb

test/avg_test.cc
vpx_dsp/vpx_dsp_rtcd_defs.pl
vpx_dsp/x86/avg_intrin_sse2.c

index b908281c1304ffb9226d131c2de1ae09006536f8..09cefe816c45e686d5894cc44780ffbfbf91166d 100644 (file)
@@ -348,16 +348,12 @@ INSTANTIATE_TEST_CASE_P(
                       make_tuple(64, &vpx_int_pro_col_sse2,
                                  &vpx_int_pro_col_c)));
 
-// TODO(jingning): Remove the highbitdepth flag once the SIMD functions are
-// in place.
-#if !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(SSE2, SatdTest,
                         ::testing::Values(make_tuple(16, &vpx_satd_sse2),
                                           make_tuple(64, &vpx_satd_sse2),
                                           make_tuple(256, &vpx_satd_sse2),
                                           make_tuple(1024, &vpx_satd_sse2)));
 #endif
-#endif
 
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(
@@ -383,6 +379,8 @@ INSTANTIATE_TEST_CASE_P(
                       make_tuple(64, &vpx_int_pro_col_neon,
                                  &vpx_int_pro_col_c)));
 
+// TODO(jingning): Remove the highbitdepth flag once the SIMD functions are
+// in place.
 #if !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(NEON, SatdTest,
                         ::testing::Values(make_tuple(16, &vpx_satd_neon),
index 3cb2011b83e69213603c84a35f70785952f9f545..347e341cd85786004b86d1241ed472be92c87137 100644 (file)
@@ -894,7 +894,7 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
     specialize qw/vpx_hadamard_16x16/;
 
     add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length";
-    specialize qw/vpx_satd/;
+    specialize qw/vpx_satd sse2/;
   } else {
     add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
     specialize qw/vpx_hadamard_8x8 sse2 neon msa/, "$ssse3_x86_64";
index b0a104bad066a3c2272fae7eca40e390d8b32053..cc6935d5957a5066f1675ddb0884e10a4c7f3be2 100644 (file)
@@ -11,6 +11,8 @@
 #include <emmintrin.h>
 
 #include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/x86/fdct.h"
 #include "vpx_ports/mem.h"
 
 void vpx_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp,
@@ -283,13 +285,13 @@ void vpx_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
   }
 }
 
-int vpx_satd_sse2(const int16_t *coeff, int length) {
+int vpx_satd_sse2(const tran_low_t *coeff, int length) {
   int i;
   const __m128i zero = _mm_setzero_si128();
   __m128i accum = zero;
 
   for (i = 0; i < length; i += 8) {
-    const __m128i src_line = _mm_load_si128((const __m128i *)coeff);
+    const __m128i src_line = load_tran_low(coeff);
     const __m128i inv = _mm_sub_epi16(zero, src_line);
     const __m128i abs = _mm_max_epi16(src_line, inv);  // abs(src_line)
     const __m128i abs_lo = _mm_unpacklo_epi16(abs, zero);