From: Luca Barbato Date: Sun, 9 Apr 2017 00:09:56 +0000 (+0000) Subject: ppc: d63 predictor 32x32 X-Git-Tag: v1.7.0~539 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=92e33c7b31f4952f17c98810418d6deee4e34d2e;p=libvpx ppc: d63 predictor 32x32 About 10x faster. Change-Id: If7d0645f75c5d7deb9751edd0bf47e2f9068e9e7 --- diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index e578d430f..08dd3148c 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc @@ -327,7 +327,8 @@ INTRA_PRED_TEST(VSX, TestIntraPred32, vpx_dc_predictor_32x32_vsx, vpx_dc_left_predictor_32x32_vsx, vpx_dc_top_predictor_32x32_vsx, vpx_dc_128_predictor_32x32_vsx, vpx_v_predictor_32x32_vsx, vpx_h_predictor_32x32_vsx, vpx_d45_predictor_32x32_vsx, NULL, - NULL, NULL, NULL, NULL, vpx_tm_predictor_32x32_vsx) + NULL, NULL, NULL, vpx_d63_predictor_32x32_vsx, + vpx_tm_predictor_32x32_vsx) #endif // HAVE_VSX // ----------------------------------------------------------------------------- diff --git a/vpx_dsp/ppc/intrapred_vsx.c b/vpx_dsp/ppc/intrapred_vsx.c index 5a00c3deb..2006539f8 100644 --- a/vpx_dsp/ppc/intrapred_vsx.c +++ b/vpx_dsp/ppc/intrapred_vsx.c @@ -562,3 +562,33 @@ void vpx_d63_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, row1 = vec_perm(row1, above_right, sl1); } } + +void vpx_d63_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const uint8x16_t a0 = vec_vsx_ld(0, above); + const uint8x16_t a1 = vec_vsx_ld(16, above); + const uint8x16_t a2 = vec_vsx_ld(32, above); + const uint8x16_t above_right = vec_splat(a2, 0); + const uint8x16_t b0 = vec_perm(a0, a1, sl1); + const uint8x16_t b1 = vec_perm(a1, above_right, sl1); + const uint8x16_t c0 = vec_perm(b0, b1, sl1); + const uint8x16_t c1 = vec_perm(b1, above_right, sl1); + uint8x16_t row0_0 = vec_avg(a0, b0); + uint8x16_t row0_1 = vec_avg(a1, b1); + uint8x16_t row1_0 = avg3(a0, b0, c0); + uint8x16_t row1_1 = avg3(a1, b1, c1); + int i; + (void)left; + + for (i = 0; i < 16; i++) { + vec_vsx_st(row0_0, 0, dst); + vec_vsx_st(row0_1, 16, dst); + vec_vsx_st(row1_0, 0, dst + stride); + vec_vsx_st(row1_1, 16, dst + stride); + dst += stride * 2; + row0_0 = vec_perm(row0_0, row0_1, sl1); + row0_1 = vec_perm(row0_1, above_right, sl1); + row1_0 = vec_perm(row1_0, row1_1, sl1); + row1_1 = vec_perm(row1_1, above_right, sl1); + } +} diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 8c65c17bd..affa9b143 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -154,7 +154,7 @@ add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, c specialize qw/vpx_d45_predictor_32x32 neon ssse3 vsx/; add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d63_predictor_32x32 ssse3/; +specialize qw/vpx_d63_predictor_32x32 ssse3 vsx/; add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_h_predictor_32x32 neon msa sse2 vsx/;