From 143b21e362efadf432352e57666d8d9853492d88 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Thu, 11 May 2017 03:58:34 +0000 Subject: [PATCH] ppc: Add get_mb_ss_vsx Change-Id: I1b54a7a5bb642e4b836d786ea1ae506eed025e3f --- test/variance_test.cc | 3 +++ vpx_dsp/ppc/variance_vsx.c | 17 +++++++++++++++++ vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/test/variance_test.cc b/test/variance_test.cc index e1c549aa8..4fc5cf5d6 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -1340,6 +1340,9 @@ INSTANTIATE_TEST_CASE_P( #endif // HAVE_MSA #if HAVE_VSX +INSTANTIATE_TEST_CASE_P(VSX, SumOfSquaresTest, + ::testing::Values(vpx_get_mb_ss_vsx)); + INSTANTIATE_TEST_CASE_P(VSX, VpxSseTest, ::testing::Values(SseParams(2, 2, &vpx_get4x4sse_cs_vsx))); diff --git a/vpx_dsp/ppc/variance_vsx.c b/vpx_dsp/ppc/variance_vsx.c index 8eb6f1a12..1efe2f005 100644 --- a/vpx_dsp/ppc/variance_vsx.c +++ b/vpx_dsp/ppc/variance_vsx.c @@ -38,6 +38,23 @@ uint32_t vpx_get4x4sse_cs_vsx(const uint8_t *a, int a_stride, const uint8_t *b, return distortion; } +// TODO(lu_zero): Unroll +uint32_t vpx_get_mb_ss_vsx(const int16_t *a) { + unsigned int i, sum = 0; + int32x4_t s = vec_splat_s32(0); + + for (i = 0; i < 256; i += 8) { + const int16x8_t v = vec_vsx_ld(0, a + i); + s = vec_msum(v, v, s); + } + + s = vec_splat(vec_sums(s, vec_splat_s32(0)), 3); + + vec_ste((uint32x4_t)s, 0, &sum); + + return sum; +} + void vpx_comp_avg_pred_vsx(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { int i, j; diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 7368d90ea..56fe66542 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1171,7 +1171,7 @@ add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stri specialize qw/vpx_mse8x8 sse2 msa/; add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *"; - specialize qw/vpx_get_mb_ss sse2 msa/; + specialize qw/vpx_get_mb_ss sse2 msa vsx/; add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"; specialize qw/vpx_get4x4sse_cs neon msa vsx/; -- 2.40.0