From d51d3934f590573e5be5178b17463d2cbc2ddc37 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Wed, 26 Apr 2017 16:31:11 +0000 Subject: [PATCH] ppc: Add convolve_avg Change-Id: Ib203c444c708f42072e38301ee3db97b5b53d014 --- test/convolve_test.cc | 2 +- vpx_dsp/ppc/vpx_convolve_vsx.c | 75 ++++++++++++++++++++++++++++++++++ vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/test/convolve_test.cc b/test/convolve_test.cc index 950a28d2b..bb009740a 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@ -1224,7 +1224,7 @@ INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, #if HAVE_VSX const ConvolveFunctions convolve8_vsx( - vpx_convolve_copy_vsx, vpx_convolve_avg_c, vpx_convolve8_horiz_c, + vpx_convolve_copy_vsx, vpx_convolve_avg_vsx, vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c, vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c, vpx_convolve8_c, vpx_convolve8_avg_c, vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, diff --git a/vpx_dsp/ppc/vpx_convolve_vsx.c b/vpx_dsp/ppc/vpx_convolve_vsx.c index 32cdbb4d0..670de8118 100644 --- a/vpx_dsp/ppc/vpx_convolve_vsx.c +++ b/vpx_dsp/ppc/vpx_convolve_vsx.c @@ -83,3 +83,78 @@ void vpx_convolve_copy_vsx(const uint8_t *src, ptrdiff_t src_stride, } } } + +static inline void avg_w16(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { + int i; + + for (i = h; i--;) { + const uint8x16_t v = vec_avg(vec_vsx_ld(0, src), vec_vsx_ld(0, dst)); + vec_vsx_st(v, 0, dst); + src += src_stride; + dst += dst_stride; + } +} + +static inline void avg_w32(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { + int i; + + for (i = h; i--;) { + const uint8x16_t v0 = vec_avg(vec_vsx_ld(0, src), vec_vsx_ld(0, dst)); + const uint8x16_t v1 = vec_avg(vec_vsx_ld(16, src), vec_vsx_ld(16, dst)); + vec_vsx_st(v0, 0, dst); + vec_vsx_st(v1, 16, dst); + src += src_stride; + dst += dst_stride; + } +} + +static inline void avg_w64(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { + int i; + + for (i = h; i--;) { + const uint8x16_t v0 = vec_avg(vec_vsx_ld(0, src), vec_vsx_ld(0, dst)); + const uint8x16_t v1 = vec_avg(vec_vsx_ld(16, src), vec_vsx_ld(16, dst)); + const uint8x16_t v2 = vec_avg(vec_vsx_ld(32, src), vec_vsx_ld(32, dst)); + const uint8x16_t v3 = vec_avg(vec_vsx_ld(48, src), vec_vsx_ld(48, dst)); + vec_vsx_st(v0, 0, dst); + vec_vsx_st(v1, 16, dst); + vec_vsx_st(v2, 32, dst); + vec_vsx_st(v3, 48, dst); + src += src_stride; + dst += dst_stride; + } +} + +void vpx_convolve_avg_vsx(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int32_t filter_x_stride, + const int16_t *filter_y, int32_t filter_y_stride, + int32_t w, int32_t h) { + (void)filter_x; + (void)filter_y; + (void)filter_x_stride; + (void)filter_y_stride; + + switch (w) { + case 16: { + avg_w16(src, src_stride, dst, dst_stride, h); + break; + } + case 32: { + avg_w32(src, src_stride, dst, dst_stride, h); + break; + } + case 64: { + avg_w64(src, src_stride, dst, dst_stride, h); + break; + } + default: { + vpx_convolve_avg_c(src, src_stride, dst, dst_stride, filter_x, + filter_x_stride, filter_y, filter_y_stride, w, h); + break; + } + } +} diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 0a9327ba6..a93e95ebe 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -335,7 +335,7 @@ add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, specialize qw/vpx_convolve_copy neon dspr2 msa sse2 vsx/; add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve_avg neon dspr2 msa sse2/; +specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx/; add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa/; -- 2.40.0